diff options
60 files changed, 6142 insertions, 551 deletions
diff --git a/drivers/atm/nicstar.c b/drivers/atm/nicstar.c index ddc4ceb..700ed15 100644 --- a/drivers/atm/nicstar.c +++ b/drivers/atm/nicstar.c @@ -874,7 +874,8 @@ static scq_info *get_scq(ns_dev *card, int size, u32 scd) scq->skb = kmalloc(sizeof(struct sk_buff *) * (size / NS_SCQE_SIZE), GFP_KERNEL); if (!scq->skb) { - kfree(scq->org); + dma_free_coherent(&card->pcidev->dev, + 2 * size, scq->org, scq->dma); kfree(scq); return NULL; } diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c index 8db8405..768085f 100644 --- a/drivers/infiniband/hw/mlx4/qp.c +++ b/drivers/infiniband/hw/mlx4/qp.c @@ -232,7 +232,7 @@ static void stamp_send_wqe(struct mlx4_ib_qp *qp, int n, int size) } } else { ctrl = buf = get_send_wqe(qp, n & (qp->sq.wqe_cnt - 1)); - s = (ctrl->fence_size & 0x3f) << 4; + s = (ctrl->qpn_vlan.fence_size & 0x3f) << 4; for (i = 64; i < s; i += 64) { wqe = buf + i; *wqe = cpu_to_be32(0xffffffff); @@ -264,7 +264,7 @@ static void post_nop_wqe(struct mlx4_ib_qp *qp, int n, int size) inl->byte_count = cpu_to_be32(1 << 31 | (size - s - sizeof *inl)); } ctrl->srcrb_flags = 0; - ctrl->fence_size = size / 16; + ctrl->qpn_vlan.fence_size = size / 16; /* * Make sure descriptor is fully written before setting ownership bit * (because HW can start executing as soon as we do). @@ -1992,7 +1992,8 @@ static int __mlx4_ib_modify_qp(struct ib_qp *ibqp, ctrl = get_send_wqe(qp, i); ctrl->owner_opcode = cpu_to_be32(1 << 31); if (qp->sq_max_wqes_per_wr == 1) - ctrl->fence_size = 1 << (qp->sq.wqe_shift - 4); + ctrl->qpn_vlan.fence_size = + 1 << (qp->sq.wqe_shift - 4); stamp_send_wqe(qp, i, 1 << qp->sq.wqe_shift); } @@ -3169,8 +3170,8 @@ int mlx4_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr, wmb(); *lso_wqe = lso_hdr_sz; - ctrl->fence_size = (wr->send_flags & IB_SEND_FENCE ? - MLX4_WQE_CTRL_FENCE : 0) | size; + ctrl->qpn_vlan.fence_size = (wr->send_flags & IB_SEND_FENCE ? + MLX4_WQE_CTRL_FENCE : 0) | size; /* * Make sure descriptor is fully written before diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c index 5cb06f7..9ba2173 100644 --- a/drivers/net/dsa/mv88e6xxx/chip.c +++ b/drivers/net/dsa/mv88e6xxx/chip.c @@ -216,6 +216,32 @@ static int mv88e6xxx_write(struct mv88e6xxx_chip *chip, return 0; } +/* Indirect write to single pointer-data register with an Update bit */ +static int mv88e6xxx_update(struct mv88e6xxx_chip *chip, int addr, int reg, + u16 update) +{ + u16 val; + int i, err; + + /* Wait until the previous operation is completed */ + for (i = 0; i < 16; ++i) { + err = mv88e6xxx_read(chip, addr, reg, &val); + if (err) + return err; + + if (!(val & BIT(15))) + break; + } + + if (i == 16) + return -ETIMEDOUT; + + /* Set the Update bit to trigger a write operation */ + val = BIT(15) | update; + + return mv88e6xxx_write(chip, addr, reg, val); +} + static int _mv88e6xxx_reg_read(struct mv88e6xxx_chip *chip, int addr, int reg) { u16 val; @@ -257,68 +283,6 @@ static int mv88e6xxx_reg_write(struct mv88e6xxx_chip *chip, int addr, return ret; } -static int mv88e6xxx_set_addr_direct(struct dsa_switch *ds, u8 *addr) -{ - struct mv88e6xxx_chip *chip = ds_to_priv(ds); - int err; - - err = mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_MAC_01, - (addr[0] << 8) | addr[1]); - if (err) - return err; - - err = mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_MAC_23, - (addr[2] << 8) | addr[3]); - if (err) - return err; - - return mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_MAC_45, - (addr[4] << 8) | addr[5]); -} - -static int mv88e6xxx_set_addr_indirect(struct dsa_switch *ds, u8 *addr) -{ - struct mv88e6xxx_chip *chip = ds_to_priv(ds); - int ret; - int i; - - for (i = 0; i < 6; i++) { - int j; - - /* Write the MAC address byte. */ - ret = mv88e6xxx_reg_write(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MAC, - GLOBAL2_SWITCH_MAC_BUSY | - (i << 8) | addr[i]); - if (ret) - return ret; - - /* Wait for the write to complete. */ - for (j = 0; j < 16; j++) { - ret = mv88e6xxx_reg_read(chip, REG_GLOBAL2, - GLOBAL2_SWITCH_MAC); - if (ret < 0) - return ret; - - if ((ret & GLOBAL2_SWITCH_MAC_BUSY) == 0) - break; - } - if (j == 16) - return -ETIMEDOUT; - } - - return 0; -} - -static int mv88e6xxx_set_addr(struct dsa_switch *ds, u8 *addr) -{ - struct mv88e6xxx_chip *chip = ds_to_priv(ds); - - if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_SWITCH_MAC)) - return mv88e6xxx_set_addr_indirect(ds, addr); - else - return mv88e6xxx_set_addr_direct(ds, addr); -} - static int mv88e6xxx_mdio_read_direct(struct mv88e6xxx_chip *chip, int addr, int regnum) { @@ -1460,9 +1424,6 @@ static void mv88e6xxx_port_stp_state_set(struct dsa_switch *ds, int port, int stp_state; int err; - if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_PORTSTATE)) - return; - switch (state) { case BR_STATE_DISABLED: stp_state = PORT_CONTROL_STATE_DISABLED; @@ -2398,11 +2359,6 @@ static int mv88e6xxx_port_fdb_prepare(struct dsa_switch *ds, int port, const struct switchdev_obj_port_fdb *fdb, struct switchdev_trans *trans) { - struct mv88e6xxx_chip *chip = ds_to_priv(ds); - - if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_ATU)) - return -EOPNOTSUPP; - /* We don't need any dynamic resource from the kernel (yet), * so skip the prepare phase. */ @@ -2418,9 +2374,6 @@ static void mv88e6xxx_port_fdb_add(struct dsa_switch *ds, int port, GLOBAL_ATU_DATA_STATE_UC_STATIC; struct mv88e6xxx_chip *chip = ds_to_priv(ds); - if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_ATU)) - return; - mutex_lock(&chip->reg_lock); if (_mv88e6xxx_port_fdb_load(chip, port, fdb->addr, fdb->vid, state)) netdev_err(ds->ports[port].netdev, @@ -2434,9 +2387,6 @@ static int mv88e6xxx_port_fdb_del(struct dsa_switch *ds, int port, struct mv88e6xxx_chip *chip = ds_to_priv(ds); int ret; - if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_ATU)) - return -EOPNOTSUPP; - mutex_lock(&chip->reg_lock); ret = _mv88e6xxx_port_fdb_load(chip, port, fdb->addr, fdb->vid, GLOBAL_ATU_DATA_STATE_UNUSED); @@ -2542,9 +2492,6 @@ static int mv88e6xxx_port_fdb_dump(struct dsa_switch *ds, int port, u16 fid; int err; - if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_ATU)) - return -EOPNOTSUPP; - mutex_lock(&chip->reg_lock); /* Dump port's default Filtering Information Database (VLAN ID 0) */ @@ -2587,9 +2534,6 @@ static int mv88e6xxx_port_bridge_join(struct dsa_switch *ds, int port, struct mv88e6xxx_chip *chip = ds_to_priv(ds); int i, err = 0; - if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_VLANTABLE)) - return -EOPNOTSUPP; - mutex_lock(&chip->reg_lock); /* Assign the bridge and remap each port's VLANTable */ @@ -2614,9 +2558,6 @@ static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port) struct net_device *bridge = chip->ports[port].bridge_dev; int i; - if (!mv88e6xxx_has(chip, MV88E6XXX_FLAG_VLANTABLE)) - return; - mutex_lock(&chip->reg_lock); /* Unassign the bridge and remap each port's VLANTable */ @@ -3016,13 +2957,70 @@ static int mv88e6xxx_setup_port(struct mv88e6xxx_chip *chip, int port) return 0; } -static int mv88e6xxx_setup_global(struct mv88e6xxx_chip *chip) +static int mv88e6xxx_g1_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr) +{ + int err; + + err = mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_MAC_01, + (addr[0] << 8) | addr[1]); + if (err) + return err; + + err = mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_MAC_23, + (addr[2] << 8) | addr[3]); + if (err) + return err; + + return mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_MAC_45, + (addr[4] << 8) | addr[5]); +} + +static int mv88e6xxx_g1_set_age_time(struct mv88e6xxx_chip *chip, + unsigned int msecs) +{ + const unsigned int coeff = chip->info->age_time_coeff; + const unsigned int min = 0x01 * coeff; + const unsigned int max = 0xff * coeff; + u8 age_time; + u16 val; + int err; + + if (msecs < min || msecs > max) + return -ERANGE; + + /* Round to nearest multiple of coeff */ + age_time = (msecs + coeff / 2) / coeff; + + err = mv88e6xxx_read(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL, &val); + if (err) + return err; + + /* AgeTime is 11:4 bits */ + val &= ~0xff0; + val |= age_time << 4; + + return mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL, val); +} + +static int mv88e6xxx_set_ageing_time(struct dsa_switch *ds, + unsigned int ageing_time) +{ + struct mv88e6xxx_chip *chip = ds_to_priv(ds); + int err; + + mutex_lock(&chip->reg_lock); + err = mv88e6xxx_g1_set_age_time(chip, ageing_time); + mutex_unlock(&chip->reg_lock); + + return err; +} + +static int mv88e6xxx_g1_setup(struct mv88e6xxx_chip *chip) { struct dsa_switch *ds = chip->ds; u32 upstream_port = dsa_upstream_port(ds); u16 reg; int err; - int i; /* Enable the PHY Polling Unit if present, don't discard any packets, * and mask all interrupt sources. @@ -3054,12 +3052,26 @@ static int mv88e6xxx_setup_global(struct mv88e6xxx_chip *chip) if (err) return err; + /* Clear all the VTU and STU entries */ + err = _mv88e6xxx_vtu_stu_flush(chip); + if (err < 0) + return err; + /* Set the default address aging time to 5 minutes, and * enable address learn messages to be sent to all message * ports. */ - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL, - 0x0140 | GLOBAL_ATU_CONTROL_LEARN2ALL); + err = mv88e6xxx_write(chip, REG_GLOBAL, GLOBAL_ATU_CONTROL, + GLOBAL_ATU_CONTROL_LEARN2ALL); + if (err) + return err; + + err = mv88e6xxx_g1_set_age_time(chip, 300000); + if (err) + return err; + + /* Clear all ATU entries */ + err = _mv88e6xxx_atu_flush(chip, 0, true); if (err) return err; @@ -3094,131 +3106,234 @@ static int mv88e6xxx_setup_global(struct mv88e6xxx_chip *chip) if (err) return err; - /* Send all frames with destination addresses matching - * 01:80:c2:00:00:0x to the CPU port. - */ - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_0X, - 0xffff); + /* Clear the statistics counters for all ports */ + err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_STATS_OP, + GLOBAL_STATS_OP_FLUSH_ALL); if (err) return err; - /* Ignore removed tag data on doubly tagged packets, disable - * flow control messages, force flow control priority to the - * highest, and send all special multicast frames to the CPU - * port at the highest priority. - */ - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MGMT, - 0x7 | GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x70 | - GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI); + /* Wait for the flush to complete. */ + err = _mv88e6xxx_stats_wait(chip); if (err) return err; - /* Program the DSA routing table. */ - for (i = 0; i < 32; i++) { - int nexthop = 0x1f; + return 0; +} - if (i != ds->index && i < DSA_MAX_SWITCHES) - nexthop = ds->rtable[i] & 0x1f; +static int mv88e6xxx_g2_device_mapping_write(struct mv88e6xxx_chip *chip, + int target, int port) +{ + u16 val = (target << 8) | (port & 0xf); + + return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_DEVICE_MAPPING, val); +} + +static int mv88e6xxx_g2_set_device_mapping(struct mv88e6xxx_chip *chip) +{ + int target, port; + int err; - err = _mv88e6xxx_reg_write( - chip, REG_GLOBAL2, - GLOBAL2_DEVICE_MAPPING, - GLOBAL2_DEVICE_MAPPING_UPDATE | - (i << GLOBAL2_DEVICE_MAPPING_TARGET_SHIFT) | nexthop); + /* Initialize the routing port to the 32 possible target devices */ + for (target = 0; target < 32; ++target) { + port = 0xf; + + if (target < DSA_MAX_SWITCHES) { + port = chip->ds->rtable[target]; + if (port == DSA_RTABLE_NONE) + port = 0xf; + } + + err = mv88e6xxx_g2_device_mapping_write(chip, target, port); if (err) - return err; + break; } - /* Clear all trunk masks. */ - for (i = 0; i < 8; i++) { - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL2, - GLOBAL2_TRUNK_MASK, - 0x8000 | - (i << GLOBAL2_TRUNK_MASK_NUM_SHIFT) | - ((1 << chip->info->num_ports) - 1)); + return err; +} + +static int mv88e6xxx_g2_trunk_mask_write(struct mv88e6xxx_chip *chip, int num, + bool hask, u16 mask) +{ + const u16 port_mask = BIT(chip->info->num_ports) - 1; + u16 val = (num << 12) | (mask & port_mask); + + if (hask) + val |= GLOBAL2_TRUNK_MASK_HASK; + + return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MASK, val); +} + +static int mv88e6xxx_g2_trunk_mapping_write(struct mv88e6xxx_chip *chip, int id, + u16 map) +{ + const u16 port_mask = BIT(chip->info->num_ports) - 1; + u16 val = (id << 11) | (map & port_mask); + + return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_TRUNK_MAPPING, val); +} + +static int mv88e6xxx_g2_clear_trunk(struct mv88e6xxx_chip *chip) +{ + const u16 port_mask = BIT(chip->info->num_ports) - 1; + int i, err; + + /* Clear all eight possible Trunk Mask vectors */ + for (i = 0; i < 8; ++i) { + err = mv88e6xxx_g2_trunk_mask_write(chip, i, false, port_mask); if (err) return err; } - /* Clear all trunk mappings. */ - for (i = 0; i < 16; i++) { - err = _mv88e6xxx_reg_write( - chip, REG_GLOBAL2, - GLOBAL2_TRUNK_MAPPING, - GLOBAL2_TRUNK_MAPPING_UPDATE | - (i << GLOBAL2_TRUNK_MAPPING_ID_SHIFT)); + /* Clear all sixteen possible Trunk ID routing vectors */ + for (i = 0; i < 16; ++i) { + err = mv88e6xxx_g2_trunk_mapping_write(chip, i, 0); if (err) return err; } - if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) || - mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) || - mv88e6xxx_6320_family(chip)) { - /* Send all frames with destination addresses matching - * 01:80:c2:00:00:2x to the CPU port. - */ - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL2, - GLOBAL2_MGMT_EN_2X, 0xffff); + return 0; +} + +static int mv88e6xxx_g2_clear_irl(struct mv88e6xxx_chip *chip) +{ + int port, err; + + /* Init all Ingress Rate Limit resources of all ports */ + for (port = 0; port < chip->info->num_ports; ++port) { + /* XXX newer chips (like 88E6390) have different 2-bit ops */ + err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD, + GLOBAL2_IRL_CMD_OP_INIT_ALL | + (port << 8)); if (err) - return err; + break; + + /* Wait for the operation to complete */ + err = _mv88e6xxx_wait(chip, REG_GLOBAL2, GLOBAL2_IRL_CMD, + GLOBAL2_IRL_CMD_BUSY); + if (err) + break; + } + + return err; +} + +/* Indirect write to the Switch MAC/WoL/WoF register */ +static int mv88e6xxx_g2_switch_mac_write(struct mv88e6xxx_chip *chip, + unsigned int pointer, u8 data) +{ + u16 val = (pointer << 8) | data; + + return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MAC, val); +} + +static int mv88e6xxx_g2_set_switch_mac(struct mv88e6xxx_chip *chip, u8 *addr) +{ + int i, err; - /* Initialise cross-chip port VLAN table to reset - * defaults. + for (i = 0; i < 6; i++) { + err = mv88e6xxx_g2_switch_mac_write(chip, i, addr[i]); + if (err) + break; + } + + return err; +} + +static int mv88e6xxx_g2_pot_write(struct mv88e6xxx_chip *chip, int pointer, + u8 data) +{ + u16 val = (pointer << 8) | (data & 0x7); + + return mv88e6xxx_update(chip, REG_GLOBAL2, GLOBAL2_PRIO_OVERRIDE, val); +} + +static int mv88e6xxx_g2_clear_pot(struct mv88e6xxx_chip *chip) +{ + int i, err; + + /* Clear all sixteen possible Priority Override entries */ + for (i = 0; i < 16; i++) { + err = mv88e6xxx_g2_pot_write(chip, i, 0); + if (err) + break; + } + + return err; +} + +static int mv88e6xxx_g2_setup(struct mv88e6xxx_chip *chip) +{ + u16 reg; + int err; + + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) { + /* Consider the frames with reserved multicast destination + * addresses matching 01:80:c2:00:00:2x as MGMT. */ - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL2, - GLOBAL2_PVT_ADDR, 0x9000); + err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_2X, + 0xffff); if (err) return err; - - /* Clear the priority override table. */ - for (i = 0; i < 16; i++) { - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL2, - GLOBAL2_PRIO_OVERRIDE, - 0x8000 | (i << 8)); - if (err) - return err; - } } - if (mv88e6xxx_6352_family(chip) || mv88e6xxx_6351_family(chip) || - mv88e6xxx_6165_family(chip) || mv88e6xxx_6097_family(chip) || - mv88e6xxx_6185_family(chip) || mv88e6xxx_6095_family(chip) || - mv88e6xxx_6320_family(chip)) { - /* Disable ingress rate limiting by resetting all - * ingress rate limit registers to their initial - * state. + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X)) { + /* Consider the frames with reserved multicast destination + * addresses matching 01:80:c2:00:00:0x as MGMT. */ - for (i = 0; i < chip->info->num_ports; i++) { - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL2, - GLOBAL2_INGRESS_OP, - 0x9000 | (i << 8)); - if (err) - return err; - } + err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_MGMT_EN_0X, + 0xffff); + if (err) + return err; } - /* Clear the statistics counters for all ports */ - err = _mv88e6xxx_reg_write(chip, REG_GLOBAL, GLOBAL_STATS_OP, - GLOBAL_STATS_OP_FLUSH_ALL); + /* Ignore removed tag data on doubly tagged packets, disable + * flow control messages, force flow control priority to the + * highest, and send all special multicast frames to the CPU + * port at the highest priority. + */ + reg = GLOBAL2_SWITCH_MGMT_FORCE_FLOW_CTRL_PRI | (0x7 << 4); + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_0X) || + mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_MGMT_EN_2X)) + reg |= GLOBAL2_SWITCH_MGMT_RSVD2CPU | 0x7; + err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_SWITCH_MGMT, reg); if (err) return err; - /* Wait for the flush to complete. */ - err = _mv88e6xxx_stats_wait(chip); + /* Program the DSA routing table. */ + err = mv88e6xxx_g2_set_device_mapping(chip); if (err) return err; - /* Clear all ATU entries */ - err = _mv88e6xxx_atu_flush(chip, 0, true); + /* Clear all trunk masks and mapping. */ + err = mv88e6xxx_g2_clear_trunk(chip); if (err) return err; - /* Clear all the VTU and STU entries */ - err = _mv88e6xxx_vtu_stu_flush(chip); - if (err < 0) - return err; + if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_IRL)) { + /* Disable ingress rate limiting by resetting all per port + * ingress rate limit resources to their initial state. + */ + err = mv88e6xxx_g2_clear_irl(chip); + if (err) + return err; + } - return err; + if (mv88e6xxx_has(chip, MV88E6XXX_FLAGS_PVT)) { + /* Initialize Cross-chip Port VLAN Table to reset defaults */ + err = mv88e6xxx_write(chip, REG_GLOBAL2, GLOBAL2_PVT_ADDR, + GLOBAL2_PVT_ADDR_OP_INIT_ONES); + if (err) + return err; + } + + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_POT)) { + /* Clear the priority override table. */ + err = mv88e6xxx_g2_clear_pot(chip); + if (err) + return err; + } + + return 0; } static int mv88e6xxx_setup(struct dsa_switch *ds) @@ -3239,12 +3354,21 @@ static int mv88e6xxx_setup(struct dsa_switch *ds) if (err) goto unlock; - err = mv88e6xxx_setup_global(chip); + /* Setup Switch Port Registers */ + for (i = 0; i < chip->info->num_ports; i++) { + err = mv88e6xxx_setup_port(chip, i); + if (err) + goto unlock; + } + + /* Setup Switch Global 1 Registers */ + err = mv88e6xxx_g1_setup(chip); if (err) goto unlock; - for (i = 0; i < chip->info->num_ports; i++) { - err = mv88e6xxx_setup_port(chip, i); + /* Setup Switch Global 2 Registers */ + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_GLOBAL2)) { + err = mv88e6xxx_g2_setup(chip); if (err) goto unlock; } @@ -3255,6 +3379,24 @@ unlock: return err; } +static int mv88e6xxx_set_addr(struct dsa_switch *ds, u8 *addr) +{ + struct mv88e6xxx_chip *chip = ds_to_priv(ds); + int err; + + mutex_lock(&chip->reg_lock); + + /* Has an indirect Switch MAC/WoL/WoF register in Global 2? */ + if (mv88e6xxx_has(chip, MV88E6XXX_FLAG_G2_SWITCH_MAC)) + err = mv88e6xxx_g2_set_switch_mac(chip, addr); + else + err = mv88e6xxx_g1_set_switch_mac(chip, addr); + + mutex_unlock(&chip->reg_lock); + + return err; +} + static int mv88e6xxx_mdio_page_read(struct dsa_switch *ds, int port, int page, int reg) { @@ -3536,6 +3678,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 10, .port_base_addr = 0x10, + .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6097, }, @@ -3546,6 +3689,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 256, .num_ports = 11, .port_base_addr = 0x10, + .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6095, }, @@ -3556,6 +3700,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 3, .port_base_addr = 0x10, + .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6165, }, @@ -3566,6 +3711,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 256, .num_ports = 8, .port_base_addr = 0x10, + .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6185, }, @@ -3576,6 +3722,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 6, .port_base_addr = 0x10, + .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6165, }, @@ -3586,6 +3733,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 6, .port_base_addr = 0x10, + .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6165, }, @@ -3596,6 +3744,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6351, }, @@ -3606,6 +3755,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6352, }, @@ -3616,6 +3766,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6351, }, @@ -3626,6 +3777,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6352, }, @@ -3636,6 +3788,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 256, .num_ports = 10, .port_base_addr = 0x10, + .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6185, }, @@ -3646,6 +3799,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6352, }, @@ -3656,6 +3810,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6320, }, @@ -3666,6 +3821,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6320, }, @@ -3676,6 +3832,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6351, }, @@ -3686,6 +3843,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6351, }, @@ -3696,6 +3854,7 @@ static const struct mv88e6xxx_info mv88e6xxx_table[] = { .num_databases = 4096, .num_ports = 7, .port_base_addr = 0x10, + .age_time_coeff = 15000, .flags = MV88E6XXX_FLAGS_FAMILY_6352, }, }; @@ -3834,6 +3993,7 @@ static struct dsa_switch_driver mv88e6xxx_switch_driver = { .set_eeprom = mv88e6xxx_set_eeprom, .get_regs_len = mv88e6xxx_get_regs_len, .get_regs = mv88e6xxx_get_regs, + .set_ageing_time = mv88e6xxx_set_ageing_time, .port_bridge_join = mv88e6xxx_port_bridge_join, .port_bridge_leave = mv88e6xxx_port_bridge_leave, .port_stp_state_set = mv88e6xxx_port_stp_state_set, diff --git a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h index 83f0662..899ca1d 100644 --- a/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h +++ b/drivers/net/dsa/mv88e6xxx/mv88e6xxx.h @@ -294,15 +294,24 @@ #define GLOBAL2_TRUNK_MASK 0x07 #define GLOBAL2_TRUNK_MASK_UPDATE BIT(15) #define GLOBAL2_TRUNK_MASK_NUM_SHIFT 12 +#define GLOBAL2_TRUNK_MASK_HASK BIT(11) #define GLOBAL2_TRUNK_MAPPING 0x08 #define GLOBAL2_TRUNK_MAPPING_UPDATE BIT(15) #define GLOBAL2_TRUNK_MAPPING_ID_SHIFT 11 -#define GLOBAL2_INGRESS_OP 0x09 -#define GLOBAL2_INGRESS_DATA 0x0a +#define GLOBAL2_IRL_CMD 0x09 +#define GLOBAL2_IRL_CMD_BUSY BIT(15) +#define GLOBAL2_IRL_CMD_OP_INIT_ALL ((0x001 << 12) | GLOBAL2_IRL_CMD_BUSY) +#define GLOBAL2_IRL_CMD_OP_INIT_SEL ((0x010 << 12) | GLOBAL2_IRL_CMD_BUSY) +#define GLOBAL2_IRL_CMD_OP_WRITE_SEL ((0x011 << 12) | GLOBAL2_IRL_CMD_BUSY) +#define GLOBAL2_IRL_CMD_OP_READ_SEL ((0x100 << 12) | GLOBAL2_IRL_CMD_BUSY) +#define GLOBAL2_IRL_DATA 0x0a #define GLOBAL2_PVT_ADDR 0x0b +#define GLOBAL2_PVT_ADDR_BUSY BIT(15) +#define GLOBAL2_PVT_ADDR_OP_INIT_ONES ((0x01 << 12) | GLOBAL2_PVT_ADDR_BUSY) +#define GLOBAL2_PVT_ADDR_OP_WRITE_PVLAN ((0x03 << 12) | GLOBAL2_PVT_ADDR_BUSY) +#define GLOBAL2_PVT_ADDR_OP_READ ((0x04 << 12) | GLOBAL2_PVT_ADDR_BUSY) #define GLOBAL2_PVT_DATA 0x0c #define GLOBAL2_SWITCH_MAC 0x0d -#define GLOBAL2_SWITCH_MAC_BUSY BIT(15) #define GLOBAL2_ATU_STATS 0x0e #define GLOBAL2_PRIO_OVERRIDE 0x0f #define GLOBAL2_PRIO_OVERRIDE_FORCE_SNOOP BIT(7) @@ -374,11 +383,6 @@ enum mv88e6xxx_family { }; enum mv88e6xxx_cap { - /* Address Translation Unit. - * The ATU is used to lookup and learn MAC addresses. See GLOBAL_ATU_OP. - */ - MV88E6XXX_CAP_ATU, - /* Energy Efficient Ethernet. */ MV88E6XXX_CAP_EEE, @@ -388,17 +392,25 @@ enum mv88e6xxx_cap { */ MV88E6XXX_CAP_EEPROM, + /* Switch Global 2 Registers. + * The device contains a second set of global 16-bit registers. + */ + MV88E6XXX_CAP_GLOBAL2, + MV88E6XXX_CAP_G2_MGMT_EN_2X, /* (0x02) MGMT Enable Register 2x */ + MV88E6XXX_CAP_G2_MGMT_EN_0X, /* (0x03) MGMT Enable Register 0x */ + MV88E6XXX_CAP_G2_IRL_CMD, /* (0x09) Ingress Rate Command */ + MV88E6XXX_CAP_G2_IRL_DATA, /* (0x0a) Ingress Rate Data */ + MV88E6XXX_CAP_G2_PVT_ADDR, /* (0x0b) Cross Chip Port VLAN Addr */ + MV88E6XXX_CAP_G2_PVT_DATA, /* (0x0c) Cross Chip Port VLAN Data */ + MV88E6XXX_CAP_G2_SWITCH_MAC, /* (0x0d) Switch MAC/WoL/WoF */ + MV88E6XXX_CAP_G2_POT, /* (0x0f) Priority Override Table */ + /* Multi-chip Addressing Mode. * Some chips require an indirect SMI access when their SMI device * address is not zero. See SMI_CMD and SMI_DATA. */ MV88E6XXX_CAP_MULTI_CHIP, - /* Port State Filtering for 802.1D Spanning Tree. - * See PORT_CONTROL_STATE_* values in the PORT_CONTROL register. - */ - MV88E6XXX_CAP_PORTSTATE, - /* PHY Polling Unit. * See GLOBAL_CONTROL_PPU_ENABLE and GLOBAL_STATUS_PPU_POLLING. */ @@ -417,25 +429,12 @@ enum mv88e6xxx_cap { */ MV88E6XXX_CAP_STU, - /* Switch MAC/WoL/WoF register. - * This requires an indirect access to set the switch MAC address - * through GLOBAL2_SWITCH_MAC, otherwise GLOBAL_MAC_01, GLOBAL_MAC_23, - * and GLOBAL_MAC_45 are used with a direct access. - */ - MV88E6XXX_CAP_SWITCH_MAC_WOL_WOF, - /* Internal temperature sensor. * Available from any enabled port's PHY register 26, page 6. */ MV88E6XXX_CAP_TEMP, MV88E6XXX_CAP_TEMP_LIMIT, - /* In-chip Port Based VLANs. - * Each port VLANTable register (see PORT_BASE_VLAN) is used to restrict - * the output (or egress) ports to which it is allowed to send frames. - */ - MV88E6XXX_CAP_VLANTABLE, - /* VLAN Table Unit. * The VTU is used to program 802.1Q VLANs. See GLOBAL_VTU_OP. */ @@ -443,90 +442,124 @@ enum mv88e6xxx_cap { }; /* Bitmask of capabilities */ -#define MV88E6XXX_FLAG_ATU BIT(MV88E6XXX_CAP_ATU) #define MV88E6XXX_FLAG_EEE BIT(MV88E6XXX_CAP_EEE) #define MV88E6XXX_FLAG_EEPROM BIT(MV88E6XXX_CAP_EEPROM) +#define MV88E6XXX_FLAG_GLOBAL2 BIT(MV88E6XXX_CAP_GLOBAL2) +#define MV88E6XXX_FLAG_G2_MGMT_EN_2X BIT(MV88E6XXX_CAP_G2_MGMT_EN_2X) +#define MV88E6XXX_FLAG_G2_MGMT_EN_0X BIT(MV88E6XXX_CAP_G2_MGMT_EN_0X) +#define MV88E6XXX_FLAG_G2_IRL_CMD BIT(MV88E6XXX_CAP_G2_IRL_CMD) +#define MV88E6XXX_FLAG_G2_IRL_DATA BIT(MV88E6XXX_CAP_G2_IRL_DATA) +#define MV88E6XXX_FLAG_G2_PVT_ADDR BIT(MV88E6XXX_CAP_G2_PVT_ADDR) +#define MV88E6XXX_FLAG_G2_PVT_DATA BIT(MV88E6XXX_CAP_G2_PVT_DATA) +#define MV88E6XXX_FLAG_G2_SWITCH_MAC BIT(MV88E6XXX_CAP_G2_SWITCH_MAC) +#define MV88E6XXX_FLAG_G2_POT BIT(MV88E6XXX_CAP_G2_POT) #define MV88E6XXX_FLAG_MULTI_CHIP BIT(MV88E6XXX_CAP_MULTI_CHIP) -#define MV88E6XXX_FLAG_PORTSTATE BIT(MV88E6XXX_CAP_PORTSTATE) #define MV88E6XXX_FLAG_PPU BIT(MV88E6XXX_CAP_PPU) #define MV88E6XXX_FLAG_PPU_ACTIVE BIT(MV88E6XXX_CAP_PPU_ACTIVE) #define MV88E6XXX_FLAG_SMI_PHY BIT(MV88E6XXX_CAP_SMI_PHY) #define MV88E6XXX_FLAG_STU BIT(MV88E6XXX_CAP_STU) -#define MV88E6XXX_FLAG_SWITCH_MAC BIT(MV88E6XXX_CAP_SWITCH_MAC_WOL_WOF) #define MV88E6XXX_FLAG_TEMP BIT(MV88E6XXX_CAP_TEMP) #define MV88E6XXX_FLAG_TEMP_LIMIT BIT(MV88E6XXX_CAP_TEMP_LIMIT) -#define MV88E6XXX_FLAG_VLANTABLE BIT(MV88E6XXX_CAP_VLANTABLE) #define MV88E6XXX_FLAG_VTU BIT(MV88E6XXX_CAP_VTU) +/* Ingress Rate Limit unit */ +#define MV88E6XXX_FLAGS_IRL \ + (MV88E6XXX_FLAG_G2_IRL_CMD | \ + MV88E6XXX_FLAG_G2_IRL_DATA) + +/* Cross-chip Port VLAN Table */ +#define MV88E6XXX_FLAGS_PVT \ + (MV88E6XXX_FLAG_G2_PVT_ADDR | \ + MV88E6XXX_FLAG_G2_PVT_DATA) + #define MV88E6XXX_FLAGS_FAMILY_6095 \ - (MV88E6XXX_FLAG_ATU | \ + (MV88E6XXX_FLAG_GLOBAL2 | \ + MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ MV88E6XXX_FLAG_MULTI_CHIP | \ MV88E6XXX_FLAG_PPU | \ - MV88E6XXX_FLAG_VLANTABLE | \ MV88E6XXX_FLAG_VTU) #define MV88E6XXX_FLAGS_FAMILY_6097 \ - (MV88E6XXX_FLAG_ATU | \ + (MV88E6XXX_FLAG_GLOBAL2 | \ + MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ + MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ + MV88E6XXX_FLAG_G2_POT | \ MV88E6XXX_FLAG_MULTI_CHIP | \ MV88E6XXX_FLAG_PPU | \ MV88E6XXX_FLAG_STU | \ - MV88E6XXX_FLAG_VLANTABLE | \ - MV88E6XXX_FLAG_VTU) + MV88E6XXX_FLAG_VTU | \ + MV88E6XXX_FLAGS_IRL | \ + MV88E6XXX_FLAGS_PVT) #define MV88E6XXX_FLAGS_FAMILY_6165 \ - (MV88E6XXX_FLAG_MULTI_CHIP | \ + (MV88E6XXX_FLAG_GLOBAL2 | \ + MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ + MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ + MV88E6XXX_FLAG_G2_SWITCH_MAC | \ + MV88E6XXX_FLAG_G2_POT | \ + MV88E6XXX_FLAG_MULTI_CHIP | \ MV88E6XXX_FLAG_STU | \ - MV88E6XXX_FLAG_SWITCH_MAC | \ MV88E6XXX_FLAG_TEMP | \ - MV88E6XXX_FLAG_VTU) + MV88E6XXX_FLAG_VTU | \ + MV88E6XXX_FLAGS_IRL | \ + MV88E6XXX_FLAGS_PVT) #define MV88E6XXX_FLAGS_FAMILY_6185 \ - (MV88E6XXX_FLAG_ATU | \ + (MV88E6XXX_FLAG_GLOBAL2 | \ + MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ MV88E6XXX_FLAG_MULTI_CHIP | \ MV88E6XXX_FLAG_PPU | \ - MV88E6XXX_FLAG_VLANTABLE | \ MV88E6XXX_FLAG_VTU) #define MV88E6XXX_FLAGS_FAMILY_6320 \ - (MV88E6XXX_FLAG_ATU | \ - MV88E6XXX_FLAG_EEE | \ + (MV88E6XXX_FLAG_EEE | \ MV88E6XXX_FLAG_EEPROM | \ + MV88E6XXX_FLAG_GLOBAL2 | \ + MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ + MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ + MV88E6XXX_FLAG_G2_SWITCH_MAC | \ + MV88E6XXX_FLAG_G2_POT | \ MV88E6XXX_FLAG_MULTI_CHIP | \ - MV88E6XXX_FLAG_PORTSTATE | \ MV88E6XXX_FLAG_PPU_ACTIVE | \ MV88E6XXX_FLAG_SMI_PHY | \ - MV88E6XXX_FLAG_SWITCH_MAC | \ MV88E6XXX_FLAG_TEMP | \ MV88E6XXX_FLAG_TEMP_LIMIT | \ - MV88E6XXX_FLAG_VLANTABLE | \ - MV88E6XXX_FLAG_VTU) + MV88E6XXX_FLAG_VTU | \ + MV88E6XXX_FLAGS_IRL | \ + MV88E6XXX_FLAGS_PVT) #define MV88E6XXX_FLAGS_FAMILY_6351 \ - (MV88E6XXX_FLAG_ATU | \ + (MV88E6XXX_FLAG_GLOBAL2 | \ + MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ + MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ + MV88E6XXX_FLAG_G2_SWITCH_MAC | \ + MV88E6XXX_FLAG_G2_POT | \ MV88E6XXX_FLAG_MULTI_CHIP | \ - MV88E6XXX_FLAG_PORTSTATE | \ MV88E6XXX_FLAG_PPU_ACTIVE | \ MV88E6XXX_FLAG_SMI_PHY | \ MV88E6XXX_FLAG_STU | \ - MV88E6XXX_FLAG_SWITCH_MAC | \ MV88E6XXX_FLAG_TEMP | \ - MV88E6XXX_FLAG_VLANTABLE | \ - MV88E6XXX_FLAG_VTU) + MV88E6XXX_FLAG_VTU | \ + MV88E6XXX_FLAGS_IRL | \ + MV88E6XXX_FLAGS_PVT) #define MV88E6XXX_FLAGS_FAMILY_6352 \ - (MV88E6XXX_FLAG_ATU | \ - MV88E6XXX_FLAG_EEE | \ + (MV88E6XXX_FLAG_EEE | \ MV88E6XXX_FLAG_EEPROM | \ + MV88E6XXX_FLAG_GLOBAL2 | \ + MV88E6XXX_FLAG_G2_MGMT_EN_2X | \ + MV88E6XXX_FLAG_G2_MGMT_EN_0X | \ + MV88E6XXX_FLAG_G2_SWITCH_MAC | \ + MV88E6XXX_FLAG_G2_POT | \ MV88E6XXX_FLAG_MULTI_CHIP | \ - MV88E6XXX_FLAG_PORTSTATE | \ MV88E6XXX_FLAG_PPU_ACTIVE | \ MV88E6XXX_FLAG_SMI_PHY | \ MV88E6XXX_FLAG_STU | \ - MV88E6XXX_FLAG_SWITCH_MAC | \ MV88E6XXX_FLAG_TEMP | \ MV88E6XXX_FLAG_TEMP_LIMIT | \ - MV88E6XXX_FLAG_VLANTABLE | \ - MV88E6XXX_FLAG_VTU) + MV88E6XXX_FLAG_VTU | \ + MV88E6XXX_FLAGS_IRL | \ + MV88E6XXX_FLAGS_PVT) struct mv88e6xxx_info { enum mv88e6xxx_family family; @@ -535,6 +568,7 @@ struct mv88e6xxx_info { unsigned int num_databases; unsigned int num_ports; unsigned int port_base_addr; + unsigned int age_time_coeff; unsigned long flags; }; diff --git a/drivers/net/ethernet/adi/bfin_mac.c b/drivers/net/ethernet/adi/bfin_mac.c index 3d2245f..38eaea1 100644 --- a/drivers/net/ethernet/adi/bfin_mac.c +++ b/drivers/net/ethernet/adi/bfin_mac.c @@ -310,7 +310,7 @@ static int bfin_mdiobus_write(struct mii_bus *bus, int phy_addr, int regnum, static void bfin_mac_adjust_link(struct net_device *dev) { struct bfin_mac_local *lp = netdev_priv(dev); - struct phy_device *phydev = lp->phydev; + struct phy_device *phydev = dev->phydev; unsigned long flags; int new_state = 0; @@ -430,7 +430,6 @@ static int mii_probe(struct net_device *dev, int phy_mode) lp->old_link = 0; lp->old_speed = 0; lp->old_duplex = -1; - lp->phydev = phydev; phy_attached_print(phydev, "mdc_clk=%dHz(mdc_div=%d)@sclk=%dMHz)\n", MDC_CLK, mdc_div, sclk / 1000000); @@ -450,31 +449,6 @@ static irqreturn_t bfin_mac_wake_interrupt(int irq, void *dev_id) return IRQ_HANDLED; } -static int -bfin_mac_ethtool_getsettings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - struct bfin_mac_local *lp = netdev_priv(dev); - - if (lp->phydev) - return phy_ethtool_gset(lp->phydev, cmd); - - return -EINVAL; -} - -static int -bfin_mac_ethtool_setsettings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - struct bfin_mac_local *lp = netdev_priv(dev); - - if (!capable(CAP_NET_ADMIN)) - return -EPERM; - - if (lp->phydev) - return phy_ethtool_sset(lp->phydev, cmd); - - return -EINVAL; -} - static void bfin_mac_ethtool_getdrvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { @@ -552,8 +526,6 @@ static int bfin_mac_ethtool_get_ts_info(struct net_device *dev, #endif static const struct ethtool_ops bfin_mac_ethtool_ops = { - .get_settings = bfin_mac_ethtool_getsettings, - .set_settings = bfin_mac_ethtool_setsettings, .get_link = ethtool_op_get_link, .get_drvinfo = bfin_mac_ethtool_getdrvinfo, .get_wol = bfin_mac_ethtool_getwol, @@ -561,6 +533,8 @@ static const struct ethtool_ops bfin_mac_ethtool_ops = { #ifdef CONFIG_BFIN_MAC_USE_HWSTAMP .get_ts_info = bfin_mac_ethtool_get_ts_info, #endif + .get_link_ksettings = phy_ethtool_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, }; /**************************************************************************/ @@ -1427,7 +1401,7 @@ static void bfin_mac_timeout(struct net_device *dev) if (netif_queue_stopped(dev)) netif_wake_queue(dev); - bfin_mac_enable(lp->phydev); + bfin_mac_enable(dev->phydev); /* We can accept TX packets again */ netif_trans_update(dev); /* prevent tx timeout */ @@ -1491,8 +1465,6 @@ static void bfin_mac_set_multicast_list(struct net_device *dev) static int bfin_mac_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { - struct bfin_mac_local *lp = netdev_priv(netdev); - if (!netif_running(netdev)) return -EINVAL; @@ -1502,8 +1474,8 @@ static int bfin_mac_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) case SIOCGHWTSTAMP: return bfin_mac_hwtstamp_get(netdev, ifr); default: - if (lp->phydev) - return phy_mii_ioctl(lp->phydev, ifr, cmd); + if (netdev->phydev) + return phy_mii_ioctl(netdev->phydev, ifr, cmd); else return -EOPNOTSUPP; } @@ -1547,12 +1519,12 @@ static int bfin_mac_open(struct net_device *dev) if (ret) return ret; - phy_start(lp->phydev); + phy_start(dev->phydev); setup_system_regs(dev); setup_mac_addr(dev->dev_addr); bfin_mac_disable(); - ret = bfin_mac_enable(lp->phydev); + ret = bfin_mac_enable(dev->phydev); if (ret) return ret; pr_debug("hardware init finished\n"); @@ -1578,8 +1550,8 @@ static int bfin_mac_close(struct net_device *dev) napi_disable(&lp->napi); netif_carrier_off(dev); - phy_stop(lp->phydev); - phy_write(lp->phydev, MII_BMCR, BMCR_PDOWN); + phy_stop(dev->phydev); + phy_write(dev->phydev, MII_BMCR, BMCR_PDOWN); /* clear everything */ bfin_mac_shutdown(dev); diff --git a/drivers/net/ethernet/adi/bfin_mac.h b/drivers/net/ethernet/adi/bfin_mac.h index d1217db..8c3b561 100644 --- a/drivers/net/ethernet/adi/bfin_mac.h +++ b/drivers/net/ethernet/adi/bfin_mac.h @@ -92,7 +92,6 @@ struct bfin_mac_local { int old_speed; int old_duplex; - struct phy_device *phydev; struct mii_bus *mii_bus; #if defined(CONFIG_BFIN_MAC_USE_HWSTAMP) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 659faa6..8a0165b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -73,6 +73,7 @@ enum board_idx { BCM57301, BCM57302, BCM57304, + BCM58700, BCM57311, BCM57312, BCM57402, @@ -98,6 +99,7 @@ static const struct { { "Broadcom BCM57301 NetXtreme-C Single-port 10Gb Ethernet" }, { "Broadcom BCM57302 NetXtreme-C Dual-port 10Gb/25Gb Ethernet" }, { "Broadcom BCM57304 NetXtreme-C Dual-port 10Gb/25Gb/40Gb/50Gb Ethernet" }, + { "Broadcom BCM58700 Nitro 4-port 1Gb/2.5Gb/10Gb Ethernet" }, { "Broadcom BCM57311 NetXtreme-C Single-port 10Gb Ethernet" }, { "Broadcom BCM57312 NetXtreme-C Dual-port 10Gb/25Gb Ethernet" }, { "Broadcom BCM57402 NetXtreme-E Dual-port 10Gb Ethernet" }, @@ -120,6 +122,7 @@ static const struct pci_device_id bnxt_pci_tbl[] = { { PCI_VDEVICE(BROADCOM, 0x16c8), .driver_data = BCM57301 }, { PCI_VDEVICE(BROADCOM, 0x16c9), .driver_data = BCM57302 }, { PCI_VDEVICE(BROADCOM, 0x16ca), .driver_data = BCM57304 }, + { PCI_VDEVICE(BROADCOM, 0x16cd), .driver_data = BCM58700 }, { PCI_VDEVICE(BROADCOM, 0x16ce), .driver_data = BCM57311 }, { PCI_VDEVICE(BROADCOM, 0x16cf), .driver_data = BCM57312 }, { PCI_VDEVICE(BROADCOM, 0x16d0), .driver_data = BCM57402 }, @@ -1668,6 +1671,76 @@ static int bnxt_poll_work(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) return rx_pkts; } +static int bnxt_poll_nitroa0(struct napi_struct *napi, int budget) +{ + struct bnxt_napi *bnapi = container_of(napi, struct bnxt_napi, napi); + struct bnxt *bp = bnapi->bp; + struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring; + struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; + struct tx_cmp *txcmp; + struct rx_cmp_ext *rxcmp1; + u32 cp_cons, tmp_raw_cons; + u32 raw_cons = cpr->cp_raw_cons; + u32 rx_pkts = 0; + bool agg_event = false; + + while (1) { + int rc; + + cp_cons = RING_CMP(raw_cons); + txcmp = &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)]; + + if (!TX_CMP_VALID(txcmp, raw_cons)) + break; + + if ((TX_CMP_TYPE(txcmp) & 0x30) == 0x10) { + tmp_raw_cons = NEXT_RAW_CMP(raw_cons); + cp_cons = RING_CMP(tmp_raw_cons); + rxcmp1 = (struct rx_cmp_ext *) + &cpr->cp_desc_ring[CP_RING(cp_cons)][CP_IDX(cp_cons)]; + + if (!RX_CMP_VALID(rxcmp1, tmp_raw_cons)) + break; + + /* force an error to recycle the buffer */ + rxcmp1->rx_cmp_cfa_code_errors_v2 |= + cpu_to_le32(RX_CMPL_ERRORS_CRC_ERROR); + + rc = bnxt_rx_pkt(bp, bnapi, &raw_cons, &agg_event); + if (likely(rc == -EIO)) + rx_pkts++; + else if (rc == -EBUSY) /* partial completion */ + break; + } else if (unlikely(TX_CMP_TYPE(txcmp) == + CMPL_BASE_TYPE_HWRM_DONE)) { + bnxt_hwrm_handler(bp, txcmp); + } else { + netdev_err(bp->dev, + "Invalid completion received on special ring\n"); + } + raw_cons = NEXT_RAW_CMP(raw_cons); + + if (rx_pkts == budget) + break; + } + + cpr->cp_raw_cons = raw_cons; + BNXT_CP_DB(cpr->cp_doorbell, cpr->cp_raw_cons); + writel(DB_KEY_RX | rxr->rx_prod, rxr->rx_doorbell); + writel(DB_KEY_RX | rxr->rx_prod, rxr->rx_doorbell); + + if (agg_event) { + writel(DB_KEY_RX | rxr->rx_agg_prod, rxr->rx_agg_doorbell); + writel(DB_KEY_RX | rxr->rx_agg_prod, rxr->rx_agg_doorbell); + } + + if (!bnxt_has_work(bp, cpr) && rx_pkts < budget) { + napi_complete(napi); + BNXT_CP_DB_REARM(cpr->cp_doorbell, cpr->cp_raw_cons); + } + return rx_pkts; +} + static int bnxt_poll(struct napi_struct *napi, int budget) { struct bnxt_napi *bnapi = container_of(napi, struct bnxt_napi, napi); @@ -2340,6 +2413,9 @@ static int bnxt_alloc_vnics(struct bnxt *bp) num_vnics += bp->rx_nr_rings; #endif + if (BNXT_CHIP_TYPE_NITRO_A0(bp)) + num_vnics++; + bp->vnic_info = kcalloc(num_vnics, sizeof(struct bnxt_vnic_info), GFP_KERNEL); if (!bp->vnic_info) @@ -2357,7 +2433,8 @@ static void bnxt_init_vnics(struct bnxt *bp) struct bnxt_vnic_info *vnic = &bp->vnic_info[i]; vnic->fw_vnic_id = INVALID_HW_RING_ID; - vnic->fw_rss_cos_lb_ctx = INVALID_HW_RING_ID; + vnic->fw_rss_cos_lb_ctx[0] = INVALID_HW_RING_ID; + vnic->fw_rss_cos_lb_ctx[1] = INVALID_HW_RING_ID; vnic->fw_l2_ctx_id = INVALID_HW_RING_ID; if (bp->vnic_info[i].rss_hash_key) { @@ -2661,7 +2738,7 @@ static int bnxt_alloc_stats(struct bnxt *bp) cpr->hw_stats_ctx_id = INVALID_STATS_CTX_ID; } - if (BNXT_PF(bp)) { + if (BNXT_PF(bp) && bp->chip_num != CHIP_NUM_58700) { bp->hw_port_stats_size = sizeof(struct rx_port_stats) + sizeof(struct tx_port_stats) + 1024; @@ -3200,8 +3277,10 @@ static int bnxt_hwrm_set_vnic_filter(struct bnxt *bp, u16 vnic_id, u16 idx, struct hwrm_cfa_l2_filter_alloc_output *resp = bp->hwrm_cmd_resp_addr; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_CFA_L2_FILTER_ALLOC, -1, -1); - req.flags = cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH_RX | - CFA_L2_FILTER_ALLOC_REQ_FLAGS_OUTERMOST); + req.flags = cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_FLAGS_PATH_RX); + if (!BNXT_CHIP_TYPE_NITRO_A0(bp)) + req.flags |= + cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_FLAGS_OUTERMOST); req.dst_id = cpu_to_le16(bp->vnic_info[vnic_id].fw_vnic_id); req.enables = cpu_to_le32(CFA_L2_FILTER_ALLOC_REQ_ENABLES_L2_ADDR | @@ -3308,7 +3387,7 @@ static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, u16 vnic_id, bool set_rss) struct bnxt_vnic_info *vnic = &bp->vnic_info[vnic_id]; struct hwrm_vnic_rss_cfg_input req = {0}; - if (vnic->fw_rss_cos_lb_ctx == INVALID_HW_RING_ID) + if (vnic->fw_rss_cos_lb_ctx[0] == INVALID_HW_RING_ID) return 0; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_RSS_CFG, -1, -1); @@ -3320,10 +3399,14 @@ static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, u16 vnic_id, bool set_rss) req.hash_type = cpu_to_le32(vnic->hash_type); - if (vnic->flags & BNXT_VNIC_RSS_FLAG) - max_rings = bp->rx_nr_rings; - else + if (vnic->flags & BNXT_VNIC_RSS_FLAG) { + if (BNXT_CHIP_TYPE_NITRO_A0(bp)) + max_rings = bp->rx_nr_rings - 1; + else + max_rings = bp->rx_nr_rings; + } else { max_rings = 1; + } /* Fill the RSS indirection table with ring group ids */ for (i = 0, j = 0; i < HW_HASH_INDEX_SIZE; i++, j++) { @@ -3336,7 +3419,7 @@ static int bnxt_hwrm_vnic_set_rss(struct bnxt *bp, u16 vnic_id, bool set_rss) req.hash_key_tbl_addr = cpu_to_le64(vnic->rss_hash_key_dma_addr); } - req.rss_ctx_idx = cpu_to_le16(vnic->fw_rss_cos_lb_ctx); + req.rss_ctx_idx = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[0]); return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); } @@ -3359,32 +3442,35 @@ static int bnxt_hwrm_vnic_set_hds(struct bnxt *bp, u16 vnic_id) return hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); } -static void bnxt_hwrm_vnic_ctx_free_one(struct bnxt *bp, u16 vnic_id) +static void bnxt_hwrm_vnic_ctx_free_one(struct bnxt *bp, u16 vnic_id, + u16 ctx_idx) { struct hwrm_vnic_rss_cos_lb_ctx_free_input req = {0}; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_RSS_COS_LB_CTX_FREE, -1, -1); req.rss_cos_lb_ctx_id = - cpu_to_le16(bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx); + cpu_to_le16(bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[ctx_idx]); hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); - bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx = INVALID_HW_RING_ID; + bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[ctx_idx] = INVALID_HW_RING_ID; } static void bnxt_hwrm_vnic_ctx_free(struct bnxt *bp) { - int i; + int i, j; for (i = 0; i < bp->nr_vnics; i++) { struct bnxt_vnic_info *vnic = &bp->vnic_info[i]; - if (vnic->fw_rss_cos_lb_ctx != INVALID_HW_RING_ID) - bnxt_hwrm_vnic_ctx_free_one(bp, i); + for (j = 0; j < BNXT_MAX_CTX_PER_VNIC; j++) { + if (vnic->fw_rss_cos_lb_ctx[j] != INVALID_HW_RING_ID) + bnxt_hwrm_vnic_ctx_free_one(bp, i, j); + } } bp->rsscos_nr_ctxs = 0; } -static int bnxt_hwrm_vnic_ctx_alloc(struct bnxt *bp, u16 vnic_id) +static int bnxt_hwrm_vnic_ctx_alloc(struct bnxt *bp, u16 vnic_id, u16 ctx_idx) { int rc; struct hwrm_vnic_rss_cos_lb_ctx_alloc_input req = {0}; @@ -3397,7 +3483,7 @@ static int bnxt_hwrm_vnic_ctx_alloc(struct bnxt *bp, u16 vnic_id) mutex_lock(&bp->hwrm_cmd_lock); rc = _hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT); if (!rc) - bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx = + bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[ctx_idx] = le16_to_cpu(resp->rss_cos_lb_ctx_id); mutex_unlock(&bp->hwrm_cmd_lock); @@ -3412,16 +3498,31 @@ static int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id) u16 def_vlan = 0; bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_VNIC_CFG, -1, -1); + + req.enables = cpu_to_le32(VNIC_CFG_REQ_ENABLES_DFLT_RING_GRP); /* Only RSS support for now TBD: COS & LB */ - req.enables = cpu_to_le32(VNIC_CFG_REQ_ENABLES_DFLT_RING_GRP | - VNIC_CFG_REQ_ENABLES_RSS_RULE | - VNIC_CFG_REQ_ENABLES_MRU); - req.rss_rule = cpu_to_le16(vnic->fw_rss_cos_lb_ctx); - req.cos_rule = cpu_to_le16(0xffff); + if (vnic->fw_rss_cos_lb_ctx[0] != INVALID_HW_RING_ID) { + req.rss_rule = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[0]); + req.enables |= cpu_to_le32(VNIC_CFG_REQ_ENABLES_RSS_RULE | + VNIC_CFG_REQ_ENABLES_MRU); + } else { + req.rss_rule = cpu_to_le16(0xffff); + } + + if (BNXT_CHIP_TYPE_NITRO_A0(bp) && + (vnic->fw_rss_cos_lb_ctx[0] != INVALID_HW_RING_ID)) { + req.cos_rule = cpu_to_le16(vnic->fw_rss_cos_lb_ctx[1]); + req.enables |= cpu_to_le32(VNIC_CFG_REQ_ENABLES_COS_RULE); + } else { + req.cos_rule = cpu_to_le16(0xffff); + } + if (vnic->flags & BNXT_VNIC_RSS_FLAG) ring = 0; else if (vnic->flags & BNXT_VNIC_RFS_FLAG) ring = vnic_id - 1; + else if ((vnic_id == 1) && BNXT_CHIP_TYPE_NITRO_A0(bp)) + ring = bp->rx_nr_rings - 1; grp_idx = bp->rx_ring[ring].bnapi->index; req.vnic_id = cpu_to_le16(vnic->fw_vnic_id); @@ -3489,7 +3590,8 @@ static int bnxt_hwrm_vnic_alloc(struct bnxt *bp, u16 vnic_id, bp->grp_info[grp_idx].fw_grp_id; } - bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx = INVALID_HW_RING_ID; + bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[0] = INVALID_HW_RING_ID; + bp->vnic_info[vnic_id].fw_rss_cos_lb_ctx[1] = INVALID_HW_RING_ID; if (vnic_id == 0) req.flags = cpu_to_le32(VNIC_ALLOC_REQ_FLAGS_DEFAULT); @@ -3922,6 +4024,9 @@ static int bnxt_hwrm_stat_ctx_free(struct bnxt *bp) if (!bp->bnapi) return 0; + if (BNXT_CHIP_TYPE_NITRO_A0(bp)) + return 0; + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_STAT_CTX_FREE, -1, -1); mutex_lock(&bp->hwrm_cmd_lock); @@ -3950,6 +4055,9 @@ static int bnxt_hwrm_stat_ctx_alloc(struct bnxt *bp) struct hwrm_stat_ctx_alloc_input req = {0}; struct hwrm_stat_ctx_alloc_output *resp = bp->hwrm_cmd_resp_addr; + if (BNXT_CHIP_TYPE_NITRO_A0(bp)) + return 0; + bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_STAT_CTX_ALLOC, -1, -1); req.update_period_ms = cpu_to_le32(bp->stats_coal_ticks / 1000); @@ -4163,6 +4271,9 @@ static int bnxt_hwrm_ver_get(struct bnxt *bp) bp->hwrm_max_req_len = le16_to_cpu(resp->max_req_win_len); bp->chip_num = le16_to_cpu(resp->chip_num); + if (bp->chip_num == CHIP_NUM_58700 && !resp->chip_rev && + !resp->chip_metal) + bp->flags |= BNXT_FLAG_CHIP_NITRO_A0; hwrm_ver_get_exit: mutex_unlock(&bp->hwrm_cmd_lock); @@ -4252,7 +4363,7 @@ static int bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id) int rc; /* allocate context for vnic */ - rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic_id); + rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic_id, 0); if (rc) { netdev_err(bp->dev, "hwrm vnic %d alloc failure rc: %x\n", vnic_id, rc); @@ -4260,6 +4371,16 @@ static int bnxt_setup_vnic(struct bnxt *bp, u16 vnic_id) } bp->rsscos_nr_ctxs++; + if (BNXT_CHIP_TYPE_NITRO_A0(bp)) { + rc = bnxt_hwrm_vnic_ctx_alloc(bp, vnic_id, 1); + if (rc) { + netdev_err(bp->dev, "hwrm vnic %d cos ctx alloc failure rc: %x\n", + vnic_id, rc); + goto vnic_setup_err; + } + bp->rsscos_nr_ctxs++; + } + /* configure default vnic, ring grp */ rc = bnxt_hwrm_vnic_cfg(bp, vnic_id); if (rc) { @@ -4327,6 +4448,26 @@ static bool bnxt_promisc_ok(struct bnxt *bp) return true; } +static int bnxt_setup_nitroa0_vnic(struct bnxt *bp) +{ + unsigned int rc = 0; + + rc = bnxt_hwrm_vnic_alloc(bp, 1, bp->rx_nr_rings - 1, 1); + if (rc) { + netdev_err(bp->dev, "Cannot allocate special vnic for NS2 A0: %x\n", + rc); + return rc; + } + + rc = bnxt_hwrm_vnic_cfg(bp, 1); + if (rc) { + netdev_err(bp->dev, "Cannot allocate special vnic for NS2 A0: %x\n", + rc); + return rc; + } + return rc; +} + static int bnxt_cfg_rx_mode(struct bnxt *); static bool bnxt_mc_list_updated(struct bnxt *, u32 *); @@ -4334,6 +4475,7 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) { struct bnxt_vnic_info *vnic = &bp->vnic_info[0]; int rc = 0; + unsigned int rx_nr_rings = bp->rx_nr_rings; if (irq_re_init) { rc = bnxt_hwrm_stat_ctx_alloc(bp); @@ -4356,8 +4498,11 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) goto err_out; } + if (BNXT_CHIP_TYPE_NITRO_A0(bp)) + rx_nr_rings--; + /* default vnic 0 */ - rc = bnxt_hwrm_vnic_alloc(bp, 0, 0, bp->rx_nr_rings); + rc = bnxt_hwrm_vnic_alloc(bp, 0, 0, rx_nr_rings); if (rc) { netdev_err(bp->dev, "hwrm vnic alloc failure rc: %x\n", rc); goto err_out; @@ -4412,7 +4557,14 @@ static int bnxt_init_chip(struct bnxt *bp, bool irq_re_init) rc = bnxt_hwrm_set_coal(bp); if (rc) netdev_warn(bp->dev, "HWRM set coalescing failure rc: %x\n", - rc); + rc); + + if (BNXT_CHIP_TYPE_NITRO_A0(bp)) { + rc = bnxt_setup_nitroa0_vnic(bp); + if (rc) + netdev_err(bp->dev, "Special vnic setup failure for NS2 A0 rc: %x\n", + rc); + } if (BNXT_VF(bp)) { bnxt_hwrm_func_qcfg(bp); @@ -4721,14 +4873,23 @@ static void bnxt_del_napi(struct bnxt *bp) static void bnxt_init_napi(struct bnxt *bp) { int i; + unsigned int cp_nr_rings = bp->cp_nr_rings; struct bnxt_napi *bnapi; if (bp->flags & BNXT_FLAG_USING_MSIX) { - for (i = 0; i < bp->cp_nr_rings; i++) { + if (BNXT_CHIP_TYPE_NITRO_A0(bp)) + cp_nr_rings--; + for (i = 0; i < cp_nr_rings; i++) { bnapi = bp->bnapi[i]; netif_napi_add(bp->dev, &bnapi->napi, bnxt_poll, 64); } + if (BNXT_CHIP_TYPE_NITRO_A0(bp)) { + bnapi = bp->bnapi[cp_nr_rings]; + netif_napi_add(bp->dev, &bnapi->napi, + bnxt_poll_nitroa0, 64); + napi_hash_add(&bnapi->napi); + } } else { bnapi = bp->bnapi[0]; netif_napi_add(bp->dev, &bnapi->napi, bnxt_poll, 64); @@ -4769,9 +4930,7 @@ static void bnxt_tx_disable(struct bnxt *bp) for (i = 0; i < bp->tx_nr_rings; i++) { txr = &bp->tx_ring[i]; txq = netdev_get_tx_queue(bp->dev, i); - __netif_tx_lock(txq, smp_processor_id()); txr->dev_state = BNXT_DEV_STATE_CLOSING; - __netif_tx_unlock(txq); } } /* Stop all TX queues */ @@ -5681,7 +5840,7 @@ static int bnxt_set_features(struct net_device *dev, netdev_features_t features) bool update_tpa = false; flags &= ~BNXT_FLAG_ALL_CONFIG_FEATS; - if ((features & NETIF_F_GRO) && (bp->pdev->revision > 0)) + if ((features & NETIF_F_GRO) && !BNXT_CHIP_TYPE_NITRO_A0(bp)) flags |= BNXT_FLAG_GRO; if (features & NETIF_F_LRO) flags |= BNXT_FLAG_LRO; @@ -6488,7 +6647,10 @@ static void _bnxt_get_max_rings(struct bnxt *bp, int *max_rx, int *max_tx, *max_cp = min_t(int, *max_cp, bp->pf.max_stat_ctxs); max_ring_grps = bp->pf.max_hw_ring_grps; } - + if (BNXT_CHIP_TYPE_NITRO_A0(bp) && BNXT_PF(bp)) { + *max_cp -= 1; + *max_rx -= 2; + } if (bp->flags & BNXT_FLAG_AGG_RINGS) *max_rx >>= 1; *max_rx = min_t(int, *max_rx, max_ring_grps); @@ -6524,6 +6686,10 @@ static int bnxt_set_dflt_rings(struct bnxt *bp) bp->cp_nr_rings = sh ? max_t(int, bp->tx_nr_rings, bp->rx_nr_rings) : bp->tx_nr_rings + bp->rx_nr_rings; bp->num_stat_ctxs = bp->cp_nr_rings; + if (BNXT_CHIP_TYPE_NITRO_A0(bp)) { + bp->rx_nr_rings++; + bp->cp_nr_rings++; + } return rc; } @@ -6550,6 +6716,9 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) struct bnxt *bp; int rc, max_irqs; + if (pdev->device == 0x16cd && pci_is_bridge(pdev)) + return -ENODEV; + if (version_printed++ == 0) pr_info("%s", version); @@ -6576,13 +6745,25 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) pci_set_drvdata(pdev, dev); + rc = bnxt_alloc_hwrm_resources(bp); + if (rc) + goto init_err; + + mutex_init(&bp->hwrm_cmd_lock); + rc = bnxt_hwrm_ver_get(bp); + if (rc) + goto init_err; + dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG | NETIF_F_TSO | NETIF_F_TSO6 | NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE | NETIF_F_GSO_IPXIP4 | NETIF_F_GSO_UDP_TUNNEL_CSUM | NETIF_F_GSO_GRE_CSUM | NETIF_F_GSO_PARTIAL | NETIF_F_RXHASH | - NETIF_F_RXCSUM | NETIF_F_LRO | NETIF_F_GRO; + NETIF_F_RXCSUM | NETIF_F_GRO; + + if (!BNXT_CHIP_TYPE_NITRO_A0(bp)) + dev->hw_features |= NETIF_F_LRO; dev->hw_enc_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG | @@ -6601,15 +6782,6 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) #ifdef CONFIG_BNXT_SRIOV init_waitqueue_head(&bp->sriov_cfg_wait); #endif - rc = bnxt_alloc_hwrm_resources(bp); - if (rc) - goto init_err; - - mutex_init(&bp->hwrm_cmd_lock); - rc = bnxt_hwrm_ver_get(bp); - if (rc) - goto init_err; - bp->gro_func = bnxt_gro_func_5730x; if (BNXT_CHIP_NUM_57X1X(bp->chip_num)) bp->gro_func = bnxt_gro_func_5731x; @@ -6647,7 +6819,7 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) #endif bnxt_set_dflt_rings(bp); - if (BNXT_PF(bp)) { + if (BNXT_PF(bp) && !BNXT_CHIP_TYPE_NITRO_A0(bp)) { dev->hw_features |= NETIF_F_NTUPLE; if (bnxt_rfs_capable(bp)) { bp->flags |= BNXT_FLAG_RFS; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 2313e37..5307a2e 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -695,7 +695,8 @@ struct bnxt_ring_grp_info { struct bnxt_vnic_info { u16 fw_vnic_id; /* returned by Chimp during alloc */ - u16 fw_rss_cos_lb_ctx; +#define BNXT_MAX_CTX_PER_VNIC 2 + u16 fw_rss_cos_lb_ctx[BNXT_MAX_CTX_PER_VNIC]; u16 fw_l2_ctx_id; #define BNXT_MAX_UC_ADDRS 4 __le64 fw_l2_filter_id[BNXT_MAX_UC_ADDRS]; @@ -893,6 +894,7 @@ struct bnxt { #define CHIP_NUM_57301 0x16c8 #define CHIP_NUM_57302 0x16c9 #define CHIP_NUM_57304 0x16ca +#define CHIP_NUM_58700 0x16cd #define CHIP_NUM_57402 0x16d0 #define CHIP_NUM_57404 0x16d1 #define CHIP_NUM_57406 0x16d2 @@ -954,6 +956,7 @@ struct bnxt { #define BNXT_FLAG_SHARED_RINGS 0x200 #define BNXT_FLAG_PORT_STATS 0x400 #define BNXT_FLAG_EEE_CAP 0x1000 + #define BNXT_FLAG_CHIP_NITRO_A0 0x1000000 #define BNXT_FLAG_ALL_CONFIG_FEATS (BNXT_FLAG_TPA | \ BNXT_FLAG_RFS | \ @@ -963,6 +966,7 @@ struct bnxt { #define BNXT_VF(bp) ((bp)->flags & BNXT_FLAG_VF) #define BNXT_NPAR(bp) ((bp)->port_partition_type) #define BNXT_SINGLE_PF(bp) (BNXT_PF(bp) && !BNXT_NPAR(bp)) +#define BNXT_CHIP_TYPE_NITRO_A0(bp) ((bp)->flags & BNXT_FLAG_CHIP_NITRO_A0) struct bnxt_napi **bnapi; diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c index 0f7dd86..492c06b 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c @@ -362,9 +362,13 @@ static void bnxt_get_channels(struct net_device *dev, channel->max_other = 0; if (bp->flags & BNXT_FLAG_SHARED_RINGS) { channel->combined_count = bp->rx_nr_rings; + if (BNXT_CHIP_TYPE_NITRO_A0(bp)) + channel->combined_count--; } else { - channel->rx_count = bp->rx_nr_rings; - channel->tx_count = bp->tx_nr_rings_per_tc; + if (!BNXT_CHIP_TYPE_NITRO_A0(bp)) { + channel->rx_count = bp->rx_nr_rings; + channel->tx_count = bp->tx_nr_rings_per_tc; + } } } @@ -387,6 +391,10 @@ static int bnxt_set_channels(struct net_device *dev, (channel->rx_count || channel->tx_count)) return -EINVAL; + if (BNXT_CHIP_TYPE_NITRO_A0(bp) && (channel->rx_count || + channel->tx_count)) + return -EINVAL; + if (channel->combined_count) sh = true; diff --git a/drivers/net/ethernet/faraday/ftgmac100.c b/drivers/net/ethernet/faraday/ftgmac100.c index e7cf313..2d4c7ea 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.c +++ b/drivers/net/ethernet/faraday/ftgmac100.c @@ -31,6 +31,7 @@ #include <linux/phy.h> #include <linux/platform_device.h> #include <net/ip.h> +#include <net/ncsi.h> #include "ftgmac100.h" @@ -68,10 +69,14 @@ struct ftgmac100 { struct net_device *netdev; struct device *dev; + struct ncsi_dev *ndev; struct napi_struct napi; struct mii_bus *mii_bus; int old_speed; + int int_mask_all; + bool use_ncsi; + bool enabled; }; static int ftgmac100_alloc_rx_page(struct ftgmac100 *priv, @@ -80,14 +85,6 @@ static int ftgmac100_alloc_rx_page(struct ftgmac100 *priv, /****************************************************************************** * internal functions (hardware register access) *****************************************************************************/ -#define INT_MASK_ALL_ENABLED (FTGMAC100_INT_RPKT_LOST | \ - FTGMAC100_INT_XPKT_ETH | \ - FTGMAC100_INT_XPKT_LOST | \ - FTGMAC100_INT_AHB_ERR | \ - FTGMAC100_INT_PHYSTS_CHG | \ - FTGMAC100_INT_RPKT_BUF | \ - FTGMAC100_INT_NO_RXBUF) - static void ftgmac100_set_rx_ring_base(struct ftgmac100 *priv, dma_addr_t addr) { iowrite32(addr, priv->base + FTGMAC100_OFFSET_RXR_BADR); @@ -141,6 +138,64 @@ static void ftgmac100_set_mac(struct ftgmac100 *priv, const unsigned char *mac) iowrite32(laddr, priv->base + FTGMAC100_OFFSET_MAC_LADR); } +static void ftgmac100_setup_mac(struct ftgmac100 *priv) +{ + u8 mac[ETH_ALEN]; + unsigned int m; + unsigned int l; + void *addr; + + addr = device_get_mac_address(priv->dev, mac, ETH_ALEN); + if (addr) { + ether_addr_copy(priv->netdev->dev_addr, mac); + dev_info(priv->dev, "Read MAC address %pM from device tree\n", + mac); + return; + } + + m = ioread32(priv->base + FTGMAC100_OFFSET_MAC_MADR); + l = ioread32(priv->base + FTGMAC100_OFFSET_MAC_LADR); + + mac[0] = (m >> 8) & 0xff; + mac[1] = m & 0xff; + mac[2] = (l >> 24) & 0xff; + mac[3] = (l >> 16) & 0xff; + mac[4] = (l >> 8) & 0xff; + mac[5] = l & 0xff; + + if (!is_valid_ether_addr(mac)) { + mac[5] = (m >> 8) & 0xff; + mac[4] = m & 0xff; + mac[3] = (l >> 24) & 0xff; + mac[2] = (l >> 16) & 0xff; + mac[1] = (l >> 8) & 0xff; + mac[0] = l & 0xff; + } + + if (is_valid_ether_addr(mac)) { + ether_addr_copy(priv->netdev->dev_addr, mac); + dev_info(priv->dev, "Read MAC address %pM from chip\n", mac); + } else { + eth_hw_addr_random(priv->netdev); + dev_info(priv->dev, "Generated random MAC address %pM\n", + priv->netdev->dev_addr); + } +} + +static int ftgmac100_set_mac_addr(struct net_device *dev, void *p) +{ + int ret; + + ret = eth_prepare_mac_addr_change(dev, p); + if (ret < 0) + return ret; + + eth_commit_mac_addr_change(dev, p); + ftgmac100_set_mac(netdev_priv(dev), dev->dev_addr); + + return 0; +} + static void ftgmac100_init_hw(struct ftgmac100 *priv) { /* setup ring buffer base registers */ @@ -952,7 +1007,10 @@ static irqreturn_t ftgmac100_interrupt(int irq, void *dev_id) struct net_device *netdev = dev_id; struct ftgmac100 *priv = netdev_priv(netdev); - if (likely(netif_running(netdev))) { + /* When running in NCSI mode, the interface should be ready for + * receiving or transmitting NCSI packets before it's opened. + */ + if (likely(priv->use_ncsi || netif_running(netdev))) { /* Disable interrupts for polling */ iowrite32(0, priv->base + FTGMAC100_OFFSET_IER); napi_schedule(&priv->napi); @@ -1005,8 +1063,9 @@ static int ftgmac100_poll(struct napi_struct *napi, int budget) ftgmac100_tx_complete(priv); } - if (status & (FTGMAC100_INT_NO_RXBUF | FTGMAC100_INT_RPKT_LOST | - FTGMAC100_INT_AHB_ERR | FTGMAC100_INT_PHYSTS_CHG)) { + if (status & priv->int_mask_all & (FTGMAC100_INT_NO_RXBUF | + FTGMAC100_INT_RPKT_LOST | FTGMAC100_INT_AHB_ERR | + FTGMAC100_INT_PHYSTS_CHG)) { if (net_ratelimit()) netdev_info(netdev, "[ISR] = 0x%x: %s%s%s%s\n", status, status & FTGMAC100_INT_NO_RXBUF ? "NO_RXBUF " : "", @@ -1029,7 +1088,8 @@ static int ftgmac100_poll(struct napi_struct *napi, int budget) napi_complete(napi); /* enable all interrupts */ - iowrite32(INT_MASK_ALL_ENABLED, priv->base + FTGMAC100_OFFSET_IER); + iowrite32(priv->int_mask_all, + priv->base + FTGMAC100_OFFSET_IER); } return rx; @@ -1065,17 +1125,33 @@ static int ftgmac100_open(struct net_device *netdev) goto err_hw; ftgmac100_init_hw(priv); - ftgmac100_start_hw(priv, 10); - - phy_start(netdev->phydev); + ftgmac100_start_hw(priv, priv->use_ncsi ? 100 : 10); + if (netdev->phydev) + phy_start(netdev->phydev); + else if (priv->use_ncsi) + netif_carrier_on(netdev); napi_enable(&priv->napi); netif_start_queue(netdev); /* enable all interrupts */ - iowrite32(INT_MASK_ALL_ENABLED, priv->base + FTGMAC100_OFFSET_IER); + iowrite32(priv->int_mask_all, priv->base + FTGMAC100_OFFSET_IER); + + /* Start the NCSI device */ + if (priv->use_ncsi) { + err = ncsi_start_dev(priv->ndev); + if (err) + goto err_ncsi; + } + + priv->enabled = true; + return 0; +err_ncsi: + napi_disable(&priv->napi); + netif_stop_queue(netdev); + iowrite32(0, priv->base + FTGMAC100_OFFSET_IER); err_hw: free_irq(priv->irq, netdev); err_irq: @@ -1088,12 +1164,17 @@ static int ftgmac100_stop(struct net_device *netdev) { struct ftgmac100 *priv = netdev_priv(netdev); + if (!priv->enabled) + return 0; + /* disable all interrupts */ + priv->enabled = false; iowrite32(0, priv->base + FTGMAC100_OFFSET_IER); netif_stop_queue(netdev); napi_disable(&priv->napi); - phy_stop(netdev->phydev); + if (netdev->phydev) + phy_stop(netdev->phydev); ftgmac100_stop_hw(priv); free_irq(priv->irq, netdev); @@ -1134,6 +1215,9 @@ static int ftgmac100_hard_start_xmit(struct sk_buff *skb, /* optional */ static int ftgmac100_do_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd) { + if (!netdev->phydev) + return -ENXIO; + return phy_mii_ioctl(netdev->phydev, ifr, cmd); } @@ -1141,11 +1225,74 @@ static const struct net_device_ops ftgmac100_netdev_ops = { .ndo_open = ftgmac100_open, .ndo_stop = ftgmac100_stop, .ndo_start_xmit = ftgmac100_hard_start_xmit, - .ndo_set_mac_address = eth_mac_addr, + .ndo_set_mac_address = ftgmac100_set_mac_addr, .ndo_validate_addr = eth_validate_addr, .ndo_do_ioctl = ftgmac100_do_ioctl, }; +static int ftgmac100_setup_mdio(struct net_device *netdev) +{ + struct ftgmac100 *priv = netdev_priv(netdev); + struct platform_device *pdev = to_platform_device(priv->dev); + int i, err = 0; + + /* initialize mdio bus */ + priv->mii_bus = mdiobus_alloc(); + if (!priv->mii_bus) + return -EIO; + + priv->mii_bus->name = "ftgmac100_mdio"; + snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "%s-%d", + pdev->name, pdev->id); + priv->mii_bus->priv = priv->netdev; + priv->mii_bus->read = ftgmac100_mdiobus_read; + priv->mii_bus->write = ftgmac100_mdiobus_write; + + for (i = 0; i < PHY_MAX_ADDR; i++) + priv->mii_bus->irq[i] = PHY_POLL; + + err = mdiobus_register(priv->mii_bus); + if (err) { + dev_err(priv->dev, "Cannot register MDIO bus!\n"); + goto err_register_mdiobus; + } + + err = ftgmac100_mii_probe(priv); + if (err) { + dev_err(priv->dev, "MII Probe failed!\n"); + goto err_mii_probe; + } + + return 0; + +err_mii_probe: + mdiobus_unregister(priv->mii_bus); +err_register_mdiobus: + mdiobus_free(priv->mii_bus); + return err; +} + +static void ftgmac100_destroy_mdio(struct net_device *netdev) +{ + struct ftgmac100 *priv = netdev_priv(netdev); + + if (!netdev->phydev) + return; + + phy_disconnect(netdev->phydev); + mdiobus_unregister(priv->mii_bus); + mdiobus_free(priv->mii_bus); +} + +static void ftgmac100_ncsi_handler(struct ncsi_dev *nd) +{ + if (unlikely(nd->state != ncsi_dev_state_functional)) + return; + + netdev_info(nd->dev, "NCSI interface %s\n", + nd->link_up ? "up" : "down"); +} + /****************************************************************************** * struct platform_driver functions *****************************************************************************/ @@ -1155,7 +1302,7 @@ static int ftgmac100_probe(struct platform_device *pdev) int irq; struct net_device *netdev; struct ftgmac100 *priv; - int err; + int err = 0; if (!pdev) return -ENODEV; @@ -1179,7 +1326,6 @@ static int ftgmac100_probe(struct platform_device *pdev) netdev->ethtool_ops = &ftgmac100_ethtool_ops; netdev->netdev_ops = &ftgmac100_netdev_ops; - netdev->features = NETIF_F_IP_CSUM | NETIF_F_GRO; platform_set_drvdata(pdev, netdev); @@ -1211,31 +1357,45 @@ static int ftgmac100_probe(struct platform_device *pdev) priv->irq = irq; - /* initialize mdio bus */ - priv->mii_bus = mdiobus_alloc(); - if (!priv->mii_bus) { - err = -EIO; - goto err_alloc_mdiobus; - } - - priv->mii_bus->name = "ftgmac100_mdio"; - snprintf(priv->mii_bus->id, MII_BUS_ID_SIZE, "ftgmac100_mii"); - - priv->mii_bus->priv = netdev; - priv->mii_bus->read = ftgmac100_mdiobus_read; - priv->mii_bus->write = ftgmac100_mdiobus_write; + /* MAC address from chip or random one */ + ftgmac100_setup_mac(priv); + + priv->int_mask_all = (FTGMAC100_INT_RPKT_LOST | + FTGMAC100_INT_XPKT_ETH | + FTGMAC100_INT_XPKT_LOST | + FTGMAC100_INT_AHB_ERR | + FTGMAC100_INT_PHYSTS_CHG | + FTGMAC100_INT_RPKT_BUF | + FTGMAC100_INT_NO_RXBUF); + if (pdev->dev.of_node && + of_get_property(pdev->dev.of_node, "use-ncsi", NULL)) { + if (!IS_ENABLED(CONFIG_NET_NCSI)) { + dev_err(&pdev->dev, "NCSI stack not enabled\n"); + goto err_ncsi_dev; + } - err = mdiobus_register(priv->mii_bus); - if (err) { - dev_err(&pdev->dev, "Cannot register MDIO bus!\n"); - goto err_register_mdiobus; + dev_info(&pdev->dev, "Using NCSI interface\n"); + priv->use_ncsi = true; + priv->int_mask_all &= ~FTGMAC100_INT_PHYSTS_CHG; + priv->ndev = ncsi_register_dev(netdev, ftgmac100_ncsi_handler); + if (!priv->ndev) + goto err_ncsi_dev; + } else { + priv->use_ncsi = false; + err = ftgmac100_setup_mdio(netdev); + if (err) + goto err_setup_mdio; } - err = ftgmac100_mii_probe(priv); - if (err) { - dev_err(&pdev->dev, "MII Probe failed!\n"); - goto err_mii_probe; - } + /* We have to disable on-chip IP checksum functionality + * when NCSI is enabled on the interface. It doesn't work + * in that case. + */ + netdev->features = NETIF_F_IP_CSUM | NETIF_F_GRO; + if (priv->use_ncsi && + of_get_property(pdev->dev.of_node, "no-hw-checksum", NULL)) + netdev->features &= ~NETIF_F_IP_CSUM; + /* register network device */ err = register_netdev(netdev); @@ -1246,21 +1406,12 @@ static int ftgmac100_probe(struct platform_device *pdev) netdev_info(netdev, "irq %d, mapped at %p\n", priv->irq, priv->base); - if (!is_valid_ether_addr(netdev->dev_addr)) { - eth_hw_addr_random(netdev); - netdev_info(netdev, "generated random MAC address %pM\n", - netdev->dev_addr); - } - return 0; +err_ncsi_dev: err_register_netdev: - phy_disconnect(netdev->phydev); -err_mii_probe: - mdiobus_unregister(priv->mii_bus); -err_register_mdiobus: - mdiobus_free(priv->mii_bus); -err_alloc_mdiobus: + ftgmac100_destroy_mdio(netdev); +err_setup_mdio: iounmap(priv->base); err_ioremap: release_resource(priv->res); @@ -1280,10 +1431,7 @@ static int __exit ftgmac100_remove(struct platform_device *pdev) priv = netdev_priv(netdev); unregister_netdev(netdev); - - phy_disconnect(netdev->phydev); - mdiobus_unregister(priv->mii_bus); - mdiobus_free(priv->mii_bus); + ftgmac100_destroy_mdio(netdev); iounmap(priv->base); release_resource(priv->res); @@ -1293,14 +1441,20 @@ static int __exit ftgmac100_remove(struct platform_device *pdev) return 0; } +static const struct of_device_id ftgmac100_of_match[] = { + { .compatible = "faraday,ftgmac100" }, + { } +}; +MODULE_DEVICE_TABLE(of, ftgmac100_of_match); + static struct platform_driver ftgmac100_driver = { - .probe = ftgmac100_probe, - .remove = __exit_p(ftgmac100_remove), - .driver = { - .name = DRV_NAME, + .probe = ftgmac100_probe, + .remove = __exit_p(ftgmac100_remove), + .driver = { + .name = DRV_NAME, + .of_match_table = ftgmac100_of_match, }, }; - module_platform_driver(ftgmac100_driver); MODULE_AUTHOR("Po-Yu Chuang <ratbert@faraday-tech.com>"); diff --git a/drivers/net/ethernet/marvell/pxa168_eth.c b/drivers/net/ethernet/marvell/pxa168_eth.c index 54d5154..aeeb2e7 100644 --- a/drivers/net/ethernet/marvell/pxa168_eth.c +++ b/drivers/net/ethernet/marvell/pxa168_eth.c @@ -247,7 +247,6 @@ struct pxa168_eth_private { */ struct timer_list timeout; struct mii_bus *smi_bus; - struct phy_device *phy; /* clock */ struct clk *clk; @@ -275,8 +274,8 @@ enum hash_table_entry { HASH_ENTRY_RECEIVE_DISCARD_BIT = 2 }; -static int pxa168_get_settings(struct net_device *dev, struct ethtool_cmd *cmd); -static int pxa168_set_settings(struct net_device *dev, struct ethtool_cmd *cmd); +static int pxa168_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd); static int pxa168_init_hw(struct pxa168_eth_private *pep); static int pxa168_init_phy(struct net_device *dev); static void eth_port_reset(struct net_device *dev); @@ -644,7 +643,7 @@ static void eth_port_start(struct net_device *dev) struct pxa168_eth_private *pep = netdev_priv(dev); int tx_curr_desc, rx_curr_desc; - phy_start(pep->phy); + phy_start(dev->phydev); /* Assignment of Tx CTRP of given queue */ tx_curr_desc = pep->tx_curr_desc_q; @@ -700,7 +699,7 @@ static void eth_port_reset(struct net_device *dev) val &= ~PCR_EN; wrl(pep, PORT_CONFIG, val); - phy_stop(pep->phy); + phy_stop(dev->phydev); } /* @@ -943,7 +942,7 @@ static int set_port_config_ext(struct pxa168_eth_private *pep) static void pxa168_eth_adjust_link(struct net_device *dev) { struct pxa168_eth_private *pep = netdev_priv(dev); - struct phy_device *phy = pep->phy; + struct phy_device *phy = dev->phydev; u32 cfg, cfg_o = rdl(pep, PORT_CONFIG); u32 cfgext, cfgext_o = rdl(pep, PORT_CONFIG_EXT); @@ -972,35 +971,37 @@ static void pxa168_eth_adjust_link(struct net_device *dev) static int pxa168_init_phy(struct net_device *dev) { struct pxa168_eth_private *pep = netdev_priv(dev); - struct ethtool_cmd cmd; + struct ethtool_link_ksettings cmd; + struct phy_device *phy = NULL; int err; - if (pep->phy) + if (dev->phydev) return 0; - pep->phy = mdiobus_scan(pep->smi_bus, pep->phy_addr); - if (IS_ERR(pep->phy)) - return PTR_ERR(pep->phy); + phy = mdiobus_scan(pep->smi_bus, pep->phy_addr); + if (IS_ERR(phy)) + return PTR_ERR(phy); - err = phy_connect_direct(dev, pep->phy, pxa168_eth_adjust_link, + err = phy_connect_direct(dev, phy, pxa168_eth_adjust_link, pep->phy_intf); if (err) return err; - err = pxa168_get_settings(dev, &cmd); + err = pxa168_get_link_ksettings(dev, &cmd); if (err) return err; - cmd.phy_address = pep->phy_addr; - cmd.speed = pep->phy_speed; - cmd.duplex = pep->phy_duplex; - cmd.advertising = PHY_BASIC_FEATURES; - cmd.autoneg = AUTONEG_ENABLE; + cmd.base.phy_address = pep->phy_addr; + cmd.base.speed = pep->phy_speed; + cmd.base.duplex = pep->phy_duplex; + ethtool_convert_legacy_u32_to_link_mode(cmd.link_modes.advertising, + PHY_BASIC_FEATURES); + cmd.base.autoneg = AUTONEG_ENABLE; - if (cmd.speed != 0) - cmd.autoneg = AUTONEG_DISABLE; + if (cmd.base.speed != 0) + cmd.base.autoneg = AUTONEG_DISABLE; - return pxa168_set_settings(dev, &cmd); + return phy_ethtool_set_link_ksettings(dev, &cmd); } static int pxa168_init_hw(struct pxa168_eth_private *pep) @@ -1366,32 +1367,24 @@ static int pxa168_smi_write(struct mii_bus *bus, int phy_addr, int regnum, static int pxa168_eth_do_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { - struct pxa168_eth_private *pep = netdev_priv(dev); - if (pep->phy != NULL) - return phy_mii_ioctl(pep->phy, ifr, cmd); + if (dev->phydev != NULL) + return phy_mii_ioctl(dev->phydev, ifr, cmd); return -EOPNOTSUPP; } -static int pxa168_get_settings(struct net_device *dev, struct ethtool_cmd *cmd) +static int pxa168_get_link_ksettings(struct net_device *dev, + struct ethtool_link_ksettings *cmd) { - struct pxa168_eth_private *pep = netdev_priv(dev); int err; - err = phy_read_status(pep->phy); + err = phy_read_status(dev->phydev); if (err == 0) - err = phy_ethtool_gset(pep->phy, cmd); + err = phy_ethtool_ksettings_get(dev->phydev, cmd); return err; } -static int pxa168_set_settings(struct net_device *dev, struct ethtool_cmd *cmd) -{ - struct pxa168_eth_private *pep = netdev_priv(dev); - - return phy_ethtool_sset(pep->phy, cmd); -} - static void pxa168_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo *info) { @@ -1402,11 +1395,11 @@ static void pxa168_get_drvinfo(struct net_device *dev, } static const struct ethtool_ops pxa168_ethtool_ops = { - .get_settings = pxa168_get_settings, - .set_settings = pxa168_set_settings, .get_drvinfo = pxa168_get_drvinfo, .get_link = ethtool_op_get_link, .get_ts_info = ethtool_op_get_ts_info, + .get_link_ksettings = pxa168_get_link_ksettings, + .set_link_ksettings = phy_ethtool_set_link_ksettings, }; static const struct net_device_ops pxa168_eth_netdev_ops = { @@ -1569,8 +1562,8 @@ static int pxa168_eth_remove(struct platform_device *pdev) pep->htpr, pep->htpr_dma); pep->htpr = NULL; } - if (pep->phy) - phy_disconnect(pep->phy); + if (dev->phydev) + phy_disconnect(dev->phydev); if (pep->clk) { clk_disable_unprepare(pep->clk); } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c index 51a2e82..f32e272 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c @@ -1722,6 +1722,12 @@ static int mlx4_en_set_channels(struct net_device *dev, !channel->tx_count || !channel->rx_count) return -EINVAL; + if (channel->tx_count * MLX4_EN_NUM_UP <= priv->xdp_ring_num) { + en_err(priv, "Minimum %d tx channels required with XDP on\n", + priv->xdp_ring_num / MLX4_EN_NUM_UP + 1); + return -EINVAL; + } + mutex_lock(&mdev->state_lock); if (priv->port_up) { port_up = 1; @@ -1740,7 +1746,8 @@ static int mlx4_en_set_channels(struct net_device *dev, goto out; } - netif_set_real_num_tx_queues(dev, priv->tx_ring_num); + netif_set_real_num_tx_queues(dev, priv->tx_ring_num - + priv->xdp_ring_num); netif_set_real_num_rx_queues(dev, priv->rx_ring_num); if (dev->num_tc) diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 6083775..9abbba6 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -31,6 +31,7 @@ * */ +#include <linux/bpf.h> #include <linux/etherdevice.h> #include <linux/tcp.h> #include <linux/if_vlan.h> @@ -1521,6 +1522,24 @@ static void mlx4_en_free_affinity_hint(struct mlx4_en_priv *priv, int ring_idx) free_cpumask_var(priv->rx_ring[ring_idx]->affinity_mask); } +static void mlx4_en_init_recycle_ring(struct mlx4_en_priv *priv, + int tx_ring_idx) +{ + struct mlx4_en_tx_ring *tx_ring = priv->tx_ring[tx_ring_idx]; + int rr_index; + + rr_index = (priv->xdp_ring_num - priv->tx_ring_num) + tx_ring_idx; + if (rr_index >= 0) { + tx_ring->free_tx_desc = mlx4_en_recycle_tx_desc; + tx_ring->recycle_ring = priv->rx_ring[rr_index]; + en_dbg(DRV, priv, + "Set tx_ring[%d]->recycle_ring = rx_ring[%d]\n", + tx_ring_idx, rr_index); + } else { + tx_ring->recycle_ring = NULL; + } +} + int mlx4_en_start_port(struct net_device *dev) { struct mlx4_en_priv *priv = netdev_priv(dev); @@ -1643,6 +1662,8 @@ int mlx4_en_start_port(struct net_device *dev) } tx_ring->tx_queue = netdev_get_tx_queue(dev, i); + mlx4_en_init_recycle_ring(priv, i); + /* Arm CQ for TX completions */ mlx4_en_arm_cq(priv, cq); @@ -2112,6 +2133,11 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) en_err(priv, "Bad MTU size:%d.\n", new_mtu); return -EPERM; } + if (priv->xdp_ring_num && MLX4_EN_EFF_MTU(new_mtu) > FRAG_SZ0) { + en_err(priv, "MTU size:%d requires frags but XDP running\n", + new_mtu); + return -EOPNOTSUPP; + } dev->mtu = new_mtu; if (netif_running(dev)) { @@ -2520,6 +2546,103 @@ static int mlx4_en_set_tx_maxrate(struct net_device *dev, int queue_index, u32 m return err; } +static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + struct mlx4_en_dev *mdev = priv->mdev; + struct bpf_prog *old_prog; + int xdp_ring_num; + int port_up = 0; + int err; + int i; + + xdp_ring_num = prog ? ALIGN(priv->rx_ring_num, MLX4_EN_NUM_UP) : 0; + + /* No need to reconfigure buffers when simply swapping the + * program for a new one. + */ + if (priv->xdp_ring_num == xdp_ring_num) { + if (prog) { + prog = bpf_prog_add(prog, priv->rx_ring_num - 1); + if (IS_ERR(prog)) + return PTR_ERR(prog); + } + for (i = 0; i < priv->rx_ring_num; i++) { + /* This xchg is paired with READ_ONCE in the fastpath */ + old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + } + return 0; + } + + if (priv->num_frags > 1) { + en_err(priv, "Cannot set XDP if MTU requires multiple frags\n"); + return -EOPNOTSUPP; + } + + if (priv->tx_ring_num < xdp_ring_num + MLX4_EN_NUM_UP) { + en_err(priv, + "Minimum %d tx channels required to run XDP\n", + (xdp_ring_num + MLX4_EN_NUM_UP) / MLX4_EN_NUM_UP); + return -EINVAL; + } + + if (prog) { + prog = bpf_prog_add(prog, priv->rx_ring_num - 1); + if (IS_ERR(prog)) + return PTR_ERR(prog); + } + + mutex_lock(&mdev->state_lock); + if (priv->port_up) { + port_up = 1; + mlx4_en_stop_port(dev, 1); + } + + priv->xdp_ring_num = xdp_ring_num; + netif_set_real_num_tx_queues(dev, priv->tx_ring_num - + priv->xdp_ring_num); + + for (i = 0; i < priv->rx_ring_num; i++) { + old_prog = xchg(&priv->rx_ring[i]->xdp_prog, prog); + if (old_prog) + bpf_prog_put(old_prog); + } + + if (port_up) { + err = mlx4_en_start_port(dev); + if (err) { + en_err(priv, "Failed starting port %d for XDP change\n", + priv->port); + queue_work(mdev->workqueue, &priv->watchdog_task); + } + } + + mutex_unlock(&mdev->state_lock); + return 0; +} + +static bool mlx4_xdp_attached(struct net_device *dev) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + + return !!priv->xdp_ring_num; +} + +static int mlx4_xdp(struct net_device *dev, struct netdev_xdp *xdp) +{ + switch (xdp->command) { + case XDP_SETUP_PROG: + return mlx4_xdp_set(dev, xdp->prog); + case XDP_QUERY_PROG: + xdp->prog_attached = mlx4_xdp_attached(dev); + return 0; + default: + return -EINVAL; + } +} + static const struct net_device_ops mlx4_netdev_ops = { .ndo_open = mlx4_en_open, .ndo_stop = mlx4_en_close, @@ -2548,6 +2671,7 @@ static const struct net_device_ops mlx4_netdev_ops = { .ndo_udp_tunnel_del = mlx4_en_del_vxlan_port, .ndo_features_check = mlx4_en_features_check, .ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate, + .ndo_xdp = mlx4_xdp, }; static const struct net_device_ops mlx4_netdev_ops_master = { @@ -2584,6 +2708,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = { .ndo_udp_tunnel_del = mlx4_en_del_vxlan_port, .ndo_features_check = mlx4_en_features_check, .ndo_set_tx_maxrate = mlx4_en_set_tx_maxrate, + .ndo_xdp = mlx4_xdp, }; struct mlx4_en_bond { diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c index c1b3a9c..11d88c8 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c @@ -32,6 +32,7 @@ */ #include <net/busy_poll.h> +#include <linux/bpf.h> #include <linux/mlx4/cq.h> #include <linux/slab.h> #include <linux/mlx4/qp.h> @@ -57,7 +58,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv, struct page *page; dma_addr_t dma; - for (order = MLX4_EN_ALLOC_PREFER_ORDER; ;) { + for (order = frag_info->order; ;) { gfp_t gfp = _gfp; if (order) @@ -70,7 +71,7 @@ static int mlx4_alloc_pages(struct mlx4_en_priv *priv, return -ENOMEM; } dma = dma_map_page(priv->ddev, page, 0, PAGE_SIZE << order, - PCI_DMA_FROMDEVICE); + frag_info->dma_dir); if (dma_mapping_error(priv->ddev, dma)) { put_page(page); return -ENOMEM; @@ -124,7 +125,8 @@ out: while (i--) { if (page_alloc[i].page != ring_alloc[i].page) { dma_unmap_page(priv->ddev, page_alloc[i].dma, - page_alloc[i].page_size, PCI_DMA_FROMDEVICE); + page_alloc[i].page_size, + priv->frag_info[i].dma_dir); page = page_alloc[i].page; /* Revert changes done by mlx4_alloc_pages */ page_ref_sub(page, page_alloc[i].page_size / @@ -145,7 +147,7 @@ static void mlx4_en_free_frag(struct mlx4_en_priv *priv, if (next_frag_end > frags[i].page_size) dma_unmap_page(priv->ddev, frags[i].dma, frags[i].page_size, - PCI_DMA_FROMDEVICE); + frag_info->dma_dir); if (frags[i].page) put_page(frags[i].page); @@ -176,7 +178,8 @@ out: page_alloc = &ring->page_alloc[i]; dma_unmap_page(priv->ddev, page_alloc->dma, - page_alloc->page_size, PCI_DMA_FROMDEVICE); + page_alloc->page_size, + priv->frag_info[i].dma_dir); page = page_alloc->page; /* Revert changes done by mlx4_alloc_pages */ page_ref_sub(page, page_alloc->page_size / @@ -201,7 +204,7 @@ static void mlx4_en_destroy_allocator(struct mlx4_en_priv *priv, i, page_count(page_alloc->page)); dma_unmap_page(priv->ddev, page_alloc->dma, - page_alloc->page_size, PCI_DMA_FROMDEVICE); + page_alloc->page_size, frag_info->dma_dir); while (page_alloc->page_offset + frag_info->frag_stride < page_alloc->page_size) { put_page(page_alloc->page); @@ -244,6 +247,12 @@ static int mlx4_en_prepare_rx_desc(struct mlx4_en_priv *priv, struct mlx4_en_rx_alloc *frags = ring->rx_info + (index << priv->log_rx_info); + if (ring->page_cache.index > 0) { + frags[0] = ring->page_cache.buf[--ring->page_cache.index]; + rx_desc->data[0].addr = cpu_to_be64(frags[0].dma); + return 0; + } + return mlx4_en_alloc_frags(priv, rx_desc, frags, ring->page_alloc, gfp); } @@ -502,6 +511,24 @@ void mlx4_en_recover_from_oom(struct mlx4_en_priv *priv) } } +/* When the rx ring is running in page-per-packet mode, a released frame can go + * directly into a small cache, to avoid unmapping or touching the page + * allocator. In bpf prog performance scenarios, buffers are either forwarded + * or dropped, never converted to skbs, so every page can come directly from + * this cache when it is sized to be a multiple of the napi budget. + */ +bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, + struct mlx4_en_rx_alloc *frame) +{ + struct mlx4_en_page_cache *cache = &ring->page_cache; + + if (cache->index >= MLX4_EN_CACHE_SIZE) + return false; + + cache->buf[cache->index++] = *frame; + return true; +} + void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring **pring, u32 size, u16 stride) @@ -509,6 +536,8 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_dev *mdev = priv->mdev; struct mlx4_en_rx_ring *ring = *pring; + if (ring->xdp_prog) + bpf_prog_put(ring->xdp_prog); mlx4_free_hwq_res(mdev->dev, &ring->wqres, size * stride + TXBB_SIZE); vfree(ring->rx_info); ring->rx_info = NULL; @@ -522,6 +551,16 @@ void mlx4_en_destroy_rx_ring(struct mlx4_en_priv *priv, void mlx4_en_deactivate_rx_ring(struct mlx4_en_priv *priv, struct mlx4_en_rx_ring *ring) { + int i; + + for (i = 0; i < ring->page_cache.index; i++) { + struct mlx4_en_rx_alloc *frame = &ring->page_cache.buf[i]; + + dma_unmap_page(priv->ddev, frame->dma, frame->page_size, + priv->frag_info[0].dma_dir); + put_page(frame->page); + } + ring->page_cache.index = 0; mlx4_en_free_rx_buf(priv, ring); if (ring->stride <= TXBB_SIZE) ring->buf -= TXBB_SIZE; @@ -743,7 +782,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud struct mlx4_en_rx_ring *ring = priv->rx_ring[cq->ring]; struct mlx4_en_rx_alloc *frags; struct mlx4_en_rx_desc *rx_desc; + struct bpf_prog *xdp_prog; + int doorbell_pending; struct sk_buff *skb; + int tx_index; int index; int nr; unsigned int length; @@ -759,6 +801,10 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud if (budget <= 0) return polled; + xdp_prog = READ_ONCE(ring->xdp_prog); + doorbell_pending = 0; + tx_index = (priv->tx_ring_num - priv->xdp_ring_num) + cq->ring; + /* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx * descriptor offset can be deduced from the CQE index instead of * reading 'cqe->index' */ @@ -835,6 +881,43 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud l2_tunnel = (dev->hw_enc_features & NETIF_F_RXCSUM) && (cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL)); + /* A bpf program gets first chance to drop the packet. It may + * read bytes but not past the end of the frag. + */ + if (xdp_prog) { + struct xdp_buff xdp; + dma_addr_t dma; + u32 act; + + dma = be64_to_cpu(rx_desc->data[0].addr); + dma_sync_single_for_cpu(priv->ddev, dma, + priv->frag_info[0].frag_size, + DMA_FROM_DEVICE); + + xdp.data = page_address(frags[0].page) + + frags[0].page_offset; + xdp.data_end = xdp.data + length; + + act = bpf_prog_run_xdp(xdp_prog, &xdp); + switch (act) { + case XDP_PASS: + break; + case XDP_TX: + if (!mlx4_en_xmit_frame(frags, dev, + length, tx_index, + &doorbell_pending)) + goto consumed; + break; + default: + bpf_warn_invalid_xdp_action(act); + case XDP_ABORTED: + case XDP_DROP: + if (mlx4_en_rx_recycle(ring, frags)) + goto consumed; + goto next; + } + } + if (likely(dev->features & NETIF_F_RXCSUM)) { if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP | MLX4_CQE_STATUS_UDP)) { @@ -986,6 +1069,7 @@ next: for (nr = 0; nr < priv->num_frags; nr++) mlx4_en_free_frag(priv, frags, nr); +consumed: ++cq->mcq.cons_index; index = (cq->mcq.cons_index) & ring->size_mask; cqe = mlx4_en_get_cqe(cq->buf, index, priv->cqe_size) + factor; @@ -994,6 +1078,9 @@ next: } out: + if (doorbell_pending) + mlx4_en_xmit_doorbell(priv->tx_ring[tx_index]); + AVG_PERF_COUNTER(priv->pstats.rx_coal_avg, polled); mlx4_cq_set_ci(&cq->mcq); wmb(); /* ensure HW sees CQ consumer before we post new buffers */ @@ -1061,22 +1148,35 @@ static const int frag_sizes[] = { void mlx4_en_calc_rx_buf(struct net_device *dev) { + enum dma_data_direction dma_dir = PCI_DMA_FROMDEVICE; struct mlx4_en_priv *priv = netdev_priv(dev); - /* VLAN_HLEN is added twice,to support skb vlan tagged with multiple - * headers. (For example: ETH_P_8021Q and ETH_P_8021AD). - */ - int eff_mtu = dev->mtu + ETH_HLEN + (2 * VLAN_HLEN); + int eff_mtu = MLX4_EN_EFF_MTU(dev->mtu); + int order = MLX4_EN_ALLOC_PREFER_ORDER; + u32 align = SMP_CACHE_BYTES; int buf_size = 0; int i = 0; + /* bpf requires buffers to be set up as 1 packet per page. + * This only works when num_frags == 1. + */ + if (priv->xdp_ring_num) { + dma_dir = PCI_DMA_BIDIRECTIONAL; + /* This will gain efficient xdp frame recycling at the expense + * of more costly truesize accounting + */ + align = PAGE_SIZE; + order = 0; + } + while (buf_size < eff_mtu) { + priv->frag_info[i].order = order; priv->frag_info[i].frag_size = (eff_mtu > buf_size + frag_sizes[i]) ? frag_sizes[i] : eff_mtu - buf_size; priv->frag_info[i].frag_prefix_size = buf_size; priv->frag_info[i].frag_stride = - ALIGN(priv->frag_info[i].frag_size, - SMP_CACHE_BYTES); + ALIGN(priv->frag_info[i].frag_size, align); + priv->frag_info[i].dma_dir = dma_dir; buf_size += priv->frag_info[i].frag_size; i++; } diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 76aa4d2..9df87ca 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -196,6 +196,7 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv, ring->last_nr_txbb = 1; memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info)); memset(ring->buf, 0, ring->buf_size); + ring->free_tx_desc = mlx4_en_free_tx_desc; ring->qp_state = MLX4_QP_STATE_RST; ring->doorbell_qpn = cpu_to_be32(ring->qp.qpn << 8); @@ -265,10 +266,10 @@ static void mlx4_en_stamp_wqe(struct mlx4_en_priv *priv, } -static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, - struct mlx4_en_tx_ring *ring, - int index, u8 owner, u64 timestamp, - int napi_mode) +u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u8 owner, u64 timestamp, + int napi_mode) { struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; struct mlx4_en_tx_desc *tx_desc = ring->buf + index * TXBB_SIZE; @@ -344,6 +345,27 @@ static u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, return tx_info->nr_txbb; } +u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u8 owner, u64 timestamp, + int napi_mode) +{ + struct mlx4_en_tx_info *tx_info = &ring->tx_info[index]; + struct mlx4_en_rx_alloc frame = { + .page = tx_info->page, + .dma = tx_info->map0_dma, + .page_offset = 0, + .page_size = PAGE_SIZE, + }; + + if (!mlx4_en_rx_recycle(ring->recycle_ring, &frame)) { + dma_unmap_page(priv->ddev, tx_info->map0_dma, + PAGE_SIZE, priv->frag_info[0].dma_dir); + put_page(tx_info->page); + } + + return tx_info->nr_txbb; +} int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) { @@ -362,7 +384,7 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) } while (ring->cons != ring->prod) { - ring->last_nr_txbb = mlx4_en_free_tx_desc(priv, ring, + ring->last_nr_txbb = ring->free_tx_desc(priv, ring, ring->cons & ring->size_mask, !!(ring->cons & ring->size), 0, 0 /* Non-NAPI caller */); @@ -444,7 +466,7 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, timestamp = mlx4_en_get_cqe_ts(cqe); /* free next descriptor */ - last_nr_txbb = mlx4_en_free_tx_desc( + last_nr_txbb = ring->free_tx_desc( priv, ring, ring_index, !!((ring_cons + txbbs_skipped) & ring->size), timestamp, napi_budget); @@ -476,6 +498,9 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev, ACCESS_ONCE(ring->last_nr_txbb) = last_nr_txbb; ACCESS_ONCE(ring->cons) = ring_cons + txbbs_skipped; + if (ring->free_tx_desc == mlx4_en_recycle_tx_desc) + return done < budget; + netdev_tx_completed_queue(ring->tx_queue, packets, bytes); /* Wakeup Tx queue if this stopped, and ring is not full. @@ -631,8 +656,7 @@ static int get_real_size(const struct sk_buff *skb, static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, const struct sk_buff *skb, const struct skb_shared_info *shinfo, - int real_size, u16 *vlan_tag, - int tx_ind, void *fragptr) + void *fragptr) { struct mlx4_wqe_inline_seg *inl = &tx_desc->inl; int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl; @@ -700,10 +724,66 @@ static void mlx4_bf_copy(void __iomem *dst, const void *src, __iowrite64_copy(dst, src, bytecnt / 8); } +void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring) +{ + wmb(); + /* Since there is no iowrite*_native() that writes the + * value as is, without byteswapping - using the one + * the doesn't do byteswapping in the relevant arch + * endianness. + */ +#if defined(__LITTLE_ENDIAN) + iowrite32( +#else + iowrite32be( +#endif + ring->doorbell_qpn, + ring->bf.uar->map + MLX4_SEND_DOORBELL); +} + +static void mlx4_en_tx_write_desc(struct mlx4_en_tx_ring *ring, + struct mlx4_en_tx_desc *tx_desc, + union mlx4_wqe_qpn_vlan qpn_vlan, + int desc_size, int bf_index, + __be32 op_own, bool bf_ok, + bool send_doorbell) +{ + tx_desc->ctrl.qpn_vlan = qpn_vlan; + + if (bf_ok) { + op_own |= htonl((bf_index & 0xffff) << 8); + /* Ensure new descriptor hits memory + * before setting ownership of this descriptor to HW + */ + dma_wmb(); + tx_desc->ctrl.owner_opcode = op_own; + + wmb(); + + mlx4_bf_copy(ring->bf.reg + ring->bf.offset, &tx_desc->ctrl, + desc_size); + + wmb(); + + ring->bf.offset ^= ring->bf.buf_size; + } else { + /* Ensure new descriptor hits memory + * before setting ownership of this descriptor to HW + */ + dma_wmb(); + tx_desc->ctrl.owner_opcode = op_own; + if (send_doorbell) + mlx4_en_xmit_doorbell(ring); + else + ring->xmit_more++; + } +} + netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) { struct skb_shared_info *shinfo = skb_shinfo(skb); struct mlx4_en_priv *priv = netdev_priv(dev); + union mlx4_wqe_qpn_vlan qpn_vlan = {}; struct device *ddev = priv->ddev; struct mlx4_en_tx_ring *ring; struct mlx4_en_tx_desc *tx_desc; @@ -715,7 +795,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) int real_size; u32 index, bf_index; __be32 op_own; - u16 vlan_tag = 0; u16 vlan_proto = 0; int i_frag; int lso_header_size; @@ -725,6 +804,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) bool stop_queue; bool inline_ok; u32 ring_cons; + bool bf_ok; tx_ind = skb_get_queue_mapping(skb); ring = priv->tx_ring[tx_ind]; @@ -749,9 +829,17 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_drop; } + bf_ok = ring->bf_enabled; if (skb_vlan_tag_present(skb)) { - vlan_tag = skb_vlan_tag_get(skb); + qpn_vlan.vlan_tag = cpu_to_be16(skb_vlan_tag_get(skb)); vlan_proto = be16_to_cpu(skb->vlan_proto); + if (vlan_proto == ETH_P_8021AD) + qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_SVLAN; + else if (vlan_proto == ETH_P_8021Q) + qpn_vlan.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN; + else + qpn_vlan.ins_vlan = 0; + bf_ok = false; } netdev_txq_bql_enqueue_prefetchw(ring->tx_queue); @@ -771,6 +859,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) else { tx_desc = (struct mlx4_en_tx_desc *) ring->bounce_buf; bounce = true; + bf_ok = false; } /* Save skb in tx_info ring */ @@ -907,8 +996,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, skb->len); if (tx_info->inl) - build_inline_wqe(tx_desc, skb, shinfo, real_size, &vlan_tag, - tx_ind, fragptr); + build_inline_wqe(tx_desc, skb, shinfo, fragptr); if (skb->encapsulation) { union { @@ -946,60 +1034,15 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev) real_size = (real_size / 16) & 0x3f; - if (ring->bf_enabled && desc_size <= MAX_BF && !bounce && - !skb_vlan_tag_present(skb) && send_doorbell) { - tx_desc->ctrl.bf_qpn = ring->doorbell_qpn | - cpu_to_be32(real_size); - - op_own |= htonl((bf_index & 0xffff) << 8); - /* Ensure new descriptor hits memory - * before setting ownership of this descriptor to HW - */ - dma_wmb(); - tx_desc->ctrl.owner_opcode = op_own; - - wmb(); + bf_ok &= desc_size <= MAX_BF && send_doorbell; - mlx4_bf_copy(ring->bf.reg + ring->bf.offset, &tx_desc->ctrl, - desc_size); - - wmb(); - - ring->bf.offset ^= ring->bf.buf_size; - } else { - tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag); - if (vlan_proto == ETH_P_8021AD) - tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_SVLAN; - else if (vlan_proto == ETH_P_8021Q) - tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_CVLAN; - else - tx_desc->ctrl.ins_vlan = 0; - - tx_desc->ctrl.fence_size = real_size; + if (bf_ok) + qpn_vlan.bf_qpn = ring->doorbell_qpn | cpu_to_be32(real_size); + else + qpn_vlan.fence_size = real_size; - /* Ensure new descriptor hits memory - * before setting ownership of this descriptor to HW - */ - dma_wmb(); - tx_desc->ctrl.owner_opcode = op_own; - if (send_doorbell) { - wmb(); - /* Since there is no iowrite*_native() that writes the - * value as is, without byteswapping - using the one - * the doesn't do byteswapping in the relevant arch - * endianness. - */ -#if defined(__LITTLE_ENDIAN) - iowrite32( -#else - iowrite32be( -#endif - ring->doorbell_qpn, - ring->bf.uar->map + MLX4_SEND_DOORBELL); - } else { - ring->xmit_more++; - } - } + mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, desc_size, bf_index, + op_own, bf_ok, send_doorbell); if (unlikely(stop_queue)) { /* If queue was emptied after the if (stop_queue) , and before @@ -1034,3 +1077,106 @@ tx_drop: return NETDEV_TX_OK; } +netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame, + struct net_device *dev, unsigned int length, + int tx_ind, int *doorbell_pending) +{ + struct mlx4_en_priv *priv = netdev_priv(dev); + union mlx4_wqe_qpn_vlan qpn_vlan = {}; + struct mlx4_en_tx_ring *ring; + struct mlx4_en_tx_desc *tx_desc; + struct mlx4_wqe_data_seg *data; + struct mlx4_en_tx_info *tx_info; + int index, bf_index; + bool send_doorbell; + int nr_txbb = 1; + bool stop_queue; + dma_addr_t dma; + int real_size; + __be32 op_own; + u32 ring_cons; + bool bf_ok; + + BUILD_BUG_ON_MSG(ALIGN(CTRL_SIZE + DS_SIZE, TXBB_SIZE) != TXBB_SIZE, + "mlx4_en_xmit_frame requires minimum size tx desc"); + + ring = priv->tx_ring[tx_ind]; + + if (!priv->port_up) + goto tx_drop; + + if (mlx4_en_is_tx_ring_full(ring)) + goto tx_drop; + + /* fetch ring->cons far ahead before needing it to avoid stall */ + ring_cons = READ_ONCE(ring->cons); + + index = ring->prod & ring->size_mask; + tx_info = &ring->tx_info[index]; + + bf_ok = ring->bf_enabled; + + /* Track current inflight packets for performance analysis */ + AVG_PERF_COUNTER(priv->pstats.inflight_avg, + (u32)(ring->prod - ring_cons - 1)); + + bf_index = ring->prod; + tx_desc = ring->buf + index * TXBB_SIZE; + data = &tx_desc->data; + + dma = frame->dma; + + tx_info->page = frame->page; + frame->page = NULL; + tx_info->map0_dma = dma; + tx_info->map0_byte_count = length; + tx_info->nr_txbb = nr_txbb; + tx_info->nr_bytes = max_t(unsigned int, length, ETH_ZLEN); + tx_info->data_offset = (void *)data - (void *)tx_desc; + tx_info->ts_requested = 0; + tx_info->nr_maps = 1; + tx_info->linear = 1; + tx_info->inl = 0; + + dma_sync_single_for_device(priv->ddev, dma, length, PCI_DMA_TODEVICE); + + data->addr = cpu_to_be64(dma); + data->lkey = ring->mr_key; + dma_wmb(); + data->byte_count = cpu_to_be32(length); + + /* tx completion can avoid cache line miss for common cases */ + tx_desc->ctrl.srcrb_flags = priv->ctrl_flags; + + op_own = cpu_to_be32(MLX4_OPCODE_SEND) | + ((ring->prod & ring->size) ? + cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0); + + ring->packets++; + ring->bytes += tx_info->nr_bytes; + AVG_PERF_COUNTER(priv->pstats.tx_pktsz_avg, length); + + ring->prod += nr_txbb; + + stop_queue = mlx4_en_is_tx_ring_full(ring); + send_doorbell = stop_queue || + *doorbell_pending > MLX4_EN_DOORBELL_BUDGET; + bf_ok &= send_doorbell; + + real_size = ((CTRL_SIZE + nr_txbb * DS_SIZE) / 16) & 0x3f; + + if (bf_ok) + qpn_vlan.bf_qpn = ring->doorbell_qpn | cpu_to_be32(real_size); + else + qpn_vlan.fence_size = real_size; + + mlx4_en_tx_write_desc(ring, tx_desc, qpn_vlan, TXBB_SIZE, bf_index, + op_own, bf_ok, send_doorbell); + *doorbell_pending = send_doorbell ? 0 : *doorbell_pending + 1; + + return NETDEV_TX_OK; + +tx_drop: + ring->tx_dropped++; + return NETDEV_TX_BUSY; +} diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index d39bf59..29c81d2 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -132,6 +132,7 @@ enum { MLX4_EN_NUM_UP) #define MLX4_EN_DEFAULT_TX_WORK 256 +#define MLX4_EN_DOORBELL_BUDGET 8 /* Target number of packets to coalesce with interrupt moderation */ #define MLX4_EN_RX_COAL_TARGET 44 @@ -164,6 +165,10 @@ enum { #define MLX4_LOOPBACK_TEST_PAYLOAD (HEADER_COPY_SIZE - ETH_HLEN) #define MLX4_EN_MIN_MTU 46 +/* VLAN_HLEN is added twice,to support skb vlan tagged with multiple + * headers. (For example: ETH_P_8021Q and ETH_P_8021AD). + */ +#define MLX4_EN_EFF_MTU(mtu) ((mtu) + ETH_HLEN + (2 * VLAN_HLEN)) #define ETH_BCAST 0xffffffffffffULL #define MLX4_EN_LOOPBACK_RETRIES 5 @@ -215,7 +220,10 @@ enum cq_type { struct mlx4_en_tx_info { - struct sk_buff *skb; + union { + struct sk_buff *skb; + struct page *page; + }; dma_addr_t map0_dma; u32 map0_byte_count; u32 nr_txbb; @@ -255,6 +263,14 @@ struct mlx4_en_rx_alloc { u32 page_size; }; +#define MLX4_EN_CACHE_SIZE (2 * NAPI_POLL_WEIGHT) +struct mlx4_en_page_cache { + u32 index; + struct mlx4_en_rx_alloc buf[MLX4_EN_CACHE_SIZE]; +}; + +struct mlx4_en_priv; + struct mlx4_en_tx_ring { /* cache line used and dirtied in tx completion * (mlx4_en_free_tx_buf()) @@ -288,6 +304,11 @@ struct mlx4_en_tx_ring { __be32 mr_key; void *buf; struct mlx4_en_tx_info *tx_info; + struct mlx4_en_rx_ring *recycle_ring; + u32 (*free_tx_desc)(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u8 owner, + u64 timestamp, int napi_mode); u8 *bounce_buf; struct mlx4_qp_context context; int qpn; @@ -319,6 +340,8 @@ struct mlx4_en_rx_ring { u8 fcs_del; void *buf; void *rx_info; + struct bpf_prog *xdp_prog; + struct mlx4_en_page_cache page_cache; unsigned long bytes; unsigned long packets; unsigned long csum_ok; @@ -438,7 +461,9 @@ struct mlx4_en_mc_list { struct mlx4_en_frag_info { u16 frag_size; u16 frag_prefix_size; - u16 frag_stride; + u32 frag_stride; + enum dma_data_direction dma_dir; + int order; }; #ifdef CONFIG_MLX4_EN_DCB @@ -558,6 +583,7 @@ struct mlx4_en_priv { struct mlx4_en_frag_info frag_info[MLX4_EN_MAX_RX_FRAGS]; u16 num_frags; u16 log_rx_info; + int xdp_ring_num; struct mlx4_en_tx_ring **tx_ring; struct mlx4_en_rx_ring *rx_ring[MAX_RX_RINGS]; @@ -663,6 +689,12 @@ void mlx4_en_tx_irq(struct mlx4_cq *mcq); u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb, void *accel_priv, select_queue_fallback_t fallback); netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev); +netdev_tx_t mlx4_en_xmit_frame(struct mlx4_en_rx_alloc *frame, + struct net_device *dev, unsigned int length, + int tx_ind, int *doorbell_pending); +void mlx4_en_xmit_doorbell(struct mlx4_en_tx_ring *ring); +bool mlx4_en_rx_recycle(struct mlx4_en_rx_ring *ring, + struct mlx4_en_rx_alloc *frame); int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv, struct mlx4_en_tx_ring **pring, @@ -691,6 +723,14 @@ int mlx4_en_process_rx_cq(struct net_device *dev, int budget); int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget); int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget); +u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u8 owner, u64 timestamp, + int napi_mode); +u32 mlx4_en_recycle_tx_desc(struct mlx4_en_priv *priv, + struct mlx4_en_tx_ring *ring, + int index, u8 owner, u64 timestamp, + int napi_mode); void mlx4_en_fill_qp_context(struct mlx4_en_priv *priv, int size, int stride, int is_tx, int rss, int qpn, int cqn, int user_prio, struct mlx4_qp_context *context); diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index 5349284..8377d02 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1908,7 +1908,6 @@ static int ravb_probe(struct platform_device *pdev) /* The Ether-specific entries in the device structure. */ ndev->base_addr = res->start; - ndev->dma = -1; chip_id = (enum ravb_chip_id)of_device_get_match_data(&pdev->dev); diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 04cd39f..7bd910c 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -2996,7 +2996,6 @@ static int sh_eth_drv_probe(struct platform_device *pdev) if (devno < 0) devno = 0; - ndev->dma = -1; ret = platform_get_irq(pdev, 0); if (ret < 0) goto out_release; diff --git a/drivers/net/ethernet/synopsys/dwc_eth_qos.c b/drivers/net/ethernet/synopsys/dwc_eth_qos.c index fc1ea80..9f159a7 100644 --- a/drivers/net/ethernet/synopsys/dwc_eth_qos.c +++ b/drivers/net/ethernet/synopsys/dwc_eth_qos.c @@ -2910,7 +2910,8 @@ static int dwceqos_probe(struct platform_device *pdev) (unsigned long)ndev); tasklet_disable(&lp->tx_bdreclaim_tasklet); - lp->txtimeout_handler_wq = create_singlethread_workqueue(DRIVER_NAME); + lp->txtimeout_handler_wq = alloc_workqueue(DRIVER_NAME, + WQ_MEM_RECLAIM, 0); INIT_WORK(&lp->txtimeout_reinit, dwceqos_reinit_for_txtimeout); platform_set_drvdata(pdev, ndev); diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c index f86497c..29f381b 100644 --- a/drivers/net/ethernet/ti/cpmac.c +++ b/drivers/net/ethernet/ti/cpmac.c @@ -1006,8 +1006,10 @@ fail_desc: kfree_skb(priv->rx_head[i].skb); } } + dma_free_coherent(&dev->dev, sizeof(struct cpmac_desc) * size, + priv->desc_ring, priv->dma_ring); + fail_alloc: - kfree(priv->desc_ring); iounmap(priv->regs); fail_remap: diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index 9204d19..a38c0da 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -536,7 +536,7 @@ static void macvtap_sock_destruct(struct sock *sk) struct sk_buff *skb; while ((skb = skb_array_consume(&q->skb_array)) != NULL) - kfree(skb); + kfree_skb(skb); } static int macvtap_open(struct inode *inode, struct file *file) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index ec2c1ee..c2dcf02 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -138,6 +138,21 @@ #define MII_88E1510_GEN_CTRL_REG_1_MODE_SGMII 0x1 /* SGMII to copper */ #define MII_88E1510_GEN_CTRL_REG_1_RESET 0x8000 /* Soft reset */ +#define LPA_FIBER_1000HALF 0x40 +#define LPA_FIBER_1000FULL 0x20 + +#define LPA_PAUSE_FIBER 0x180 +#define LPA_PAUSE_ASYM_FIBER 0x100 + +#define ADVERTISE_FIBER_1000HALF 0x40 +#define ADVERTISE_FIBER_1000FULL 0x20 + +#define ADVERTISE_PAUSE_FIBER 0x180 +#define ADVERTISE_PAUSE_ASYM_FIBER 0x100 + +#define REGISTER_LINK_STATUS 0x400 +#define NB_FIBER_STATS 1 + MODULE_DESCRIPTION("Marvell PHY driver"); MODULE_AUTHOR("Andy Fleming"); MODULE_LICENSE("GPL"); @@ -150,8 +165,9 @@ struct marvell_hw_stat { }; static struct marvell_hw_stat marvell_hw_stats[] = { - { "phy_receive_errors", 0, 21, 16}, + { "phy_receive_errors_copper", 0, 21, 16}, { "phy_idle_errors", 0, 10, 8 }, + { "phy_receive_errors_fiber", 1, 21, 16}, }; struct marvell_priv { @@ -477,15 +493,122 @@ static int m88e1318_config_aneg(struct phy_device *phydev) return m88e1121_config_aneg(phydev); } +/** + * ethtool_adv_to_fiber_adv_t + * @ethadv: the ethtool advertisement settings + * + * A small helper function that translates ethtool advertisement + * settings to phy autonegotiation advertisements for the + * MII_ADV register for fiber link. + */ +static inline u32 ethtool_adv_to_fiber_adv_t(u32 ethadv) +{ + u32 result = 0; + + if (ethadv & ADVERTISED_1000baseT_Half) + result |= ADVERTISE_FIBER_1000HALF; + if (ethadv & ADVERTISED_1000baseT_Full) + result |= ADVERTISE_FIBER_1000FULL; + + if ((ethadv & ADVERTISE_PAUSE_ASYM) && (ethadv & ADVERTISE_PAUSE_CAP)) + result |= LPA_PAUSE_ASYM_FIBER; + else if (ethadv & ADVERTISE_PAUSE_CAP) + result |= (ADVERTISE_PAUSE_FIBER + & (~ADVERTISE_PAUSE_ASYM_FIBER)); + + return result; +} + +/** + * marvell_config_aneg_fiber - restart auto-negotiation or write BMCR + * @phydev: target phy_device struct + * + * Description: If auto-negotiation is enabled, we configure the + * advertising, and then restart auto-negotiation. If it is not + * enabled, then we write the BMCR. Adapted for fiber link in + * some Marvell's devices. + */ +static int marvell_config_aneg_fiber(struct phy_device *phydev) +{ + int changed = 0; + int err; + int adv, oldadv; + u32 advertise; + + if (phydev->autoneg != AUTONEG_ENABLE) + return genphy_setup_forced(phydev); + + /* Only allow advertising what this PHY supports */ + phydev->advertising &= phydev->supported; + advertise = phydev->advertising; + + /* Setup fiber advertisement */ + adv = phy_read(phydev, MII_ADVERTISE); + if (adv < 0) + return adv; + + oldadv = adv; + adv &= ~(ADVERTISE_FIBER_1000HALF | ADVERTISE_FIBER_1000FULL + | LPA_PAUSE_FIBER); + adv |= ethtool_adv_to_fiber_adv_t(advertise); + + if (adv != oldadv) { + err = phy_write(phydev, MII_ADVERTISE, adv); + if (err < 0) + return err; + + changed = 1; + } + + if (changed == 0) { + /* Advertisement hasn't changed, but maybe aneg was never on to + * begin with? Or maybe phy was isolated? + */ + int ctl = phy_read(phydev, MII_BMCR); + + if (ctl < 0) + return ctl; + + if (!(ctl & BMCR_ANENABLE) || (ctl & BMCR_ISOLATE)) + changed = 1; /* do restart aneg */ + } + + /* Only restart aneg if we are advertising something different + * than we were before. + */ + if (changed > 0) + changed = genphy_restart_aneg(phydev); + + return changed; +} + static int m88e1510_config_aneg(struct phy_device *phydev) { int err; + err = phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_COPPER); + if (err < 0) + goto error; + + /* Configure the copper link first */ err = m88e1318_config_aneg(phydev); if (err < 0) - return err; + goto error; - return 0; + /* Then the fiber link */ + err = phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_FIBER); + if (err < 0) + goto error; + + err = marvell_config_aneg_fiber(phydev); + if (err < 0) + goto error; + + return phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_COPPER); + +error: + phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_COPPER); + return err; } static int marvell_config_init(struct phy_device *phydev) @@ -890,26 +1013,79 @@ static int m88e1145_config_init(struct phy_device *phydev) return 0; } -/* marvell_read_status +/** + * fiber_lpa_to_ethtool_lpa_t + * @lpa: value of the MII_LPA register for fiber link + * + * A small helper function that translates MII_LPA + * bits to ethtool LP advertisement settings. + */ +static u32 fiber_lpa_to_ethtool_lpa_t(u32 lpa) +{ + u32 result = 0; + + if (lpa & LPA_FIBER_1000HALF) + result |= ADVERTISED_1000baseT_Half; + if (lpa & LPA_FIBER_1000FULL) + result |= ADVERTISED_1000baseT_Full; + + return result; +} + +/** + * marvell_update_link - update link status in real time in @phydev + * @phydev: target phy_device struct + * + * Description: Update the value in phydev->link to reflect the + * current link value. + */ +static int marvell_update_link(struct phy_device *phydev, int fiber) +{ + int status; + + /* Use the generic register for copper link, or specific + * register for fiber case */ + if (fiber) { + status = phy_read(phydev, MII_M1011_PHY_STATUS); + if (status < 0) + return status; + + if ((status & REGISTER_LINK_STATUS) == 0) + phydev->link = 0; + else + phydev->link = 1; + } else { + return genphy_update_link(phydev); + } + + return 0; +} + +/* marvell_read_status_page * - * Generic status code does not detect Fiber correctly! * Description: * Check the link, then figure out the current state * by comparing what we advertise with what the link partner * advertises. Start by checking the gigabit possibilities, * then move on to 10/100. */ -static int marvell_read_status(struct phy_device *phydev) +static int marvell_read_status_page(struct phy_device *phydev, int page) { int adv; int err; int lpa; int lpagb; int status = 0; + int fiber; - /* Update the link, but return if there + /* Detect and update the link, but return if there * was an error */ - err = genphy_update_link(phydev); + if (page == MII_M1111_FIBER) + fiber = 1; + else + fiber = 0; + + err = marvell_update_link(phydev, fiber); if (err) return err; @@ -930,9 +1106,6 @@ static int marvell_read_status(struct phy_device *phydev) if (adv < 0) return adv; - phydev->lp_advertising = mii_stat1000_to_ethtool_lpa_t(lpagb) | - mii_lpa_to_ethtool_lpa_t(lpa); - lpa &= adv; if (status & MII_M1011_PHY_STATUS_FULLDUPLEX) @@ -957,9 +1130,30 @@ static int marvell_read_status(struct phy_device *phydev) break; } - if (phydev->duplex == DUPLEX_FULL) { - phydev->pause = lpa & LPA_PAUSE_CAP ? 1 : 0; - phydev->asym_pause = lpa & LPA_PAUSE_ASYM ? 1 : 0; + if (!fiber) { + phydev->lp_advertising = mii_stat1000_to_ethtool_lpa_t(lpagb) | + mii_lpa_to_ethtool_lpa_t(lpa); + + if (phydev->duplex == DUPLEX_FULL) { + phydev->pause = lpa & LPA_PAUSE_CAP ? 1 : 0; + phydev->asym_pause = lpa & LPA_PAUSE_ASYM ? 1 : 0; + } + } else { + /* The fiber link is only 1000M capable */ + phydev->lp_advertising = fiber_lpa_to_ethtool_lpa_t(lpa); + + if (phydev->duplex == DUPLEX_FULL) { + if (!(lpa & LPA_PAUSE_FIBER)) { + phydev->pause = 0; + phydev->asym_pause = 0; + } else if ((lpa & LPA_PAUSE_ASYM_FIBER)) { + phydev->pause = 1; + phydev->asym_pause = 1; + } else { + phydev->pause = 1; + phydev->asym_pause = 0; + } + } } } else { int bmcr = phy_read(phydev, MII_BMCR); @@ -986,6 +1180,119 @@ static int marvell_read_status(struct phy_device *phydev) return 0; } +/* marvell_read_status + * + * Some Marvell's phys have two modes: fiber and copper. + * Both need status checked. + * Description: + * First, check the fiber link and status. + * If the fiber link is down, check the copper link and status which + * will be the default value if both link are down. + */ +static int marvell_read_status(struct phy_device *phydev) +{ + int err; + + /* Check the fiber mode first */ + if (phydev->supported & SUPPORTED_FIBRE) { + err = phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_FIBER); + if (err < 0) + goto error; + + err = marvell_read_status_page(phydev, MII_M1111_FIBER); + if (err < 0) + goto error; + + /* If the fiber link is up, it is the selected and used link. + * In this case, we need to stay in the fiber page. + * Please to be careful about that, avoid to restore Copper page + * in other functions which could break the behaviour + * for some fiber phy like 88E1512. + * */ + if (phydev->link) + return 0; + + /* If fiber link is down, check and save copper mode state */ + err = phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_COPPER); + if (err < 0) + goto error; + } + + return marvell_read_status_page(phydev, MII_M1111_COPPER); + +error: + phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_COPPER); + return err; +} + +/* marvell_suspend + * + * Some Marvell's phys have two modes: fiber and copper. + * Both need to be suspended + */ +static int marvell_suspend(struct phy_device *phydev) +{ + int err; + + /* Suspend the fiber mode first */ + if (!(phydev->supported & SUPPORTED_FIBRE)) { + err = phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_FIBER); + if (err < 0) + goto error; + + /* With the page set, use the generic suspend */ + err = genphy_suspend(phydev); + if (err < 0) + goto error; + + /* Then, the copper link */ + err = phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_COPPER); + if (err < 0) + goto error; + } + + /* With the page set, use the generic suspend */ + return genphy_suspend(phydev); + +error: + phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_COPPER); + return err; +} + +/* marvell_resume + * + * Some Marvell's phys have two modes: fiber and copper. + * Both need to be resumed + */ +static int marvell_resume(struct phy_device *phydev) +{ + int err; + + /* Resume the fiber mode first */ + if (!(phydev->supported & SUPPORTED_FIBRE)) { + err = phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_FIBER); + if (err < 0) + goto error; + + /* With the page set, use the generic resume */ + err = genphy_resume(phydev); + if (err < 0) + goto error; + + /* Then, the copper link */ + err = phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_COPPER); + if (err < 0) + goto error; + } + + /* With the page set, use the generic resume */ + return genphy_resume(phydev); + +error: + phy_write(phydev, MII_MARVELL_PHY_PAGE, MII_M1111_COPPER); + return err; +} + static int marvell_aneg_done(struct phy_device *phydev) { int retval = phy_read(phydev, MII_M1011_PHY_STATUS); @@ -1107,7 +1414,10 @@ static int m88e1318_set_wol(struct phy_device *phydev, struct ethtool_wolinfo *w static int marvell_get_sset_count(struct phy_device *phydev) { - return ARRAY_SIZE(marvell_hw_stats); + if (phydev->supported & SUPPORTED_FIBRE) + return ARRAY_SIZE(marvell_hw_stats); + else + return ARRAY_SIZE(marvell_hw_stats) - NB_FIBER_STATS; } static void marvell_get_strings(struct phy_device *phydev, u8 *data) @@ -1361,7 +1671,7 @@ static struct phy_driver marvell_drivers[] = { .phy_id = MARVELL_PHY_ID_88E1510, .phy_id_mask = MARVELL_PHY_ID_MASK, .name = "Marvell 88E1510", - .features = PHY_GBIT_FEATURES, + .features = PHY_GBIT_FEATURES | SUPPORTED_FIBRE, .flags = PHY_HAS_INTERRUPT, .probe = marvell_probe, .config_init = &m88e1510_config_init, @@ -1370,8 +1680,8 @@ static struct phy_driver marvell_drivers[] = { .ack_interrupt = &marvell_ack_interrupt, .config_intr = &marvell_config_intr, .did_interrupt = &m88e1121_did_interrupt, - .resume = &genphy_resume, - .suspend = &genphy_suspend, + .resume = &marvell_resume, + .suspend = &marvell_suspend, .get_sset_count = marvell_get_sset_count, .get_strings = marvell_get_strings, .get_stats = marvell_get_stats, diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 1dd08d4..1b5f531 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -144,8 +144,10 @@ struct virtnet_info { /* Control VQ buffers: protected by the rtnl lock */ struct virtio_net_ctrl_hdr ctrl_hdr; virtio_net_ctrl_ack ctrl_status; + struct virtio_net_ctrl_mq ctrl_mq; u8 ctrl_promisc; u8 ctrl_allmulti; + u16 ctrl_vid; /* Ethtool settings */ u8 duplex; @@ -1058,14 +1060,13 @@ static void virtnet_ack_link_announce(struct virtnet_info *vi) static int virtnet_set_queues(struct virtnet_info *vi, u16 queue_pairs) { struct scatterlist sg; - struct virtio_net_ctrl_mq s; struct net_device *dev = vi->dev; if (!vi->has_cvq || !virtio_has_feature(vi->vdev, VIRTIO_NET_F_MQ)) return 0; - s.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs); - sg_init_one(&sg, &s, sizeof(s)); + vi->ctrl_mq.virtqueue_pairs = cpu_to_virtio16(vi->vdev, queue_pairs); + sg_init_one(&sg, &vi->ctrl_mq, sizeof(vi->ctrl_mq)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_MQ, VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &sg)) { @@ -1172,7 +1173,8 @@ static int virtnet_vlan_rx_add_vid(struct net_device *dev, struct virtnet_info *vi = netdev_priv(dev); struct scatterlist sg; - sg_init_one(&sg, &vid, sizeof(vid)); + vi->ctrl_vid = vid; + sg_init_one(&sg, &vi->ctrl_vid, sizeof(vi->ctrl_vid)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, VIRTIO_NET_CTRL_VLAN_ADD, &sg)) @@ -1186,7 +1188,8 @@ static int virtnet_vlan_rx_kill_vid(struct net_device *dev, struct virtnet_info *vi = netdev_priv(dev); struct scatterlist sg; - sg_init_one(&sg, &vid, sizeof(vid)); + vi->ctrl_vid = vid; + sg_init_one(&sg, &vi->ctrl_vid, sizeof(vi->ctrl_vid)); if (!virtnet_send_command(vi, VIRTIO_NET_CTRL_VLAN, VIRTIO_NET_CTRL_VLAN_DEL, &sg)) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c13e92b..36da074 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -224,6 +224,7 @@ void bpf_register_map_type(struct bpf_map_type_list *tl); struct bpf_prog *bpf_prog_get(u32 ufd); struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type); +struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i); struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); @@ -288,6 +289,10 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, { return ERR_PTR(-EOPNOTSUPP); } +static inline struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) +{ + return ERR_PTR(-EOPNOTSUPP); +} static inline void bpf_prog_put(struct bpf_prog *prog) { diff --git a/include/linux/filter.h b/include/linux/filter.h index 6fc31ef..15d816a 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -368,6 +368,11 @@ struct bpf_skb_data_end { void *data_end; }; +struct xdp_buff { + void *data; + void *data_end; +}; + /* compute the linear packet data range [data, data_end) which * will be accessed by cls_bpf and act_bpf programs */ @@ -429,6 +434,18 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, return BPF_PROG_RUN(prog, skb); } +static inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, + struct xdp_buff *xdp) +{ + u32 ret; + + rcu_read_lock(); + ret = BPF_PROG_RUN(prog, (void *)xdp); + rcu_read_unlock(); + + return ret; +} + static inline unsigned int bpf_prog_size(unsigned int proglen) { return max(sizeof(struct bpf_prog), @@ -509,6 +526,7 @@ bool bpf_helper_changes_skb_data(void *func); struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, const struct bpf_insn *patch, u32 len); +void bpf_warn_invalid_xdp_action(u32 act); #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index 587cdf9..deaa221 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -291,16 +291,18 @@ enum { MLX4_WQE_CTRL_FORCE_LOOPBACK = 1 << 0, }; +union mlx4_wqe_qpn_vlan { + struct { + __be16 vlan_tag; + u8 ins_vlan; + u8 fence_size; + }; + __be32 bf_qpn; +}; + struct mlx4_wqe_ctrl_seg { __be32 owner_opcode; - union { - struct { - __be16 vlan_tag; - u8 ins_vlan; - u8 fence_size; - }; - __be32 bf_qpn; - }; + union mlx4_wqe_qpn_vlan qpn_vlan; /* * High 24 bits are SRC remote buffer; low 8 bits are flags: * [7] SO (strong ordering) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 49736a3..fab9a1c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -63,6 +63,7 @@ struct wpan_dev; struct mpls_dev; /* UDP Tunnel offloads */ struct udp_tunnel_info; +struct bpf_prog; void netdev_set_default_ethtool_ops(struct net_device *dev, const struct ethtool_ops *ops); @@ -799,6 +800,33 @@ struct tc_to_netdev { }; }; +/* These structures hold the attributes of xdp state that are being passed + * to the netdevice through the xdp op. + */ +enum xdp_netdev_command { + /* Set or clear a bpf program used in the earliest stages of packet + * rx. The prog will have been loaded as BPF_PROG_TYPE_XDP. The callee + * is responsible for calling bpf_prog_put on any old progs that are + * stored. In case of error, the callee need not release the new prog + * reference, but on success it takes ownership and must bpf_prog_put + * when it is no longer used. + */ + XDP_SETUP_PROG, + /* Check if a bpf program is set on the device. The callee should + * return true if a program is currently attached and running. + */ + XDP_QUERY_PROG, +}; + +struct netdev_xdp { + enum xdp_netdev_command command; + union { + /* XDP_SETUP_PROG */ + struct bpf_prog *prog; + /* XDP_QUERY_PROG */ + bool prog_attached; + }; +}; /* * This structure defines the management hooks for network devices. @@ -1087,6 +1115,9 @@ struct tc_to_netdev { * appropriate rx headroom value allows avoiding skb head copy on * forward. Setting a negative value resets the rx headroom to the * default value. + * int (*ndo_xdp)(struct net_device *dev, struct netdev_xdp *xdp); + * This function is used to set or query state related to XDP on the + * netdevice. See definition of enum xdp_netdev_command for details. * */ struct net_device_ops { @@ -1271,6 +1302,8 @@ struct net_device_ops { struct sk_buff *skb); void (*ndo_set_rx_headroom)(struct net_device *dev, int needed_headroom); + int (*ndo_xdp)(struct net_device *dev, + struct netdev_xdp *xdp); }; /** @@ -3257,6 +3290,7 @@ int dev_get_phys_port_id(struct net_device *dev, int dev_get_phys_port_name(struct net_device *dev, char *name, size_t len); int dev_change_proto_down(struct net_device *dev, bool proto_down); +int dev_change_xdp_fd(struct net_device *dev, int fd); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); diff --git a/include/net/dsa.h b/include/net/dsa.h index 52ab18b..2217a3f 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -141,6 +141,7 @@ struct dsa_switch_tree { struct dsa_port { struct net_device *netdev; struct device_node *dn; + unsigned int ageing_time; }; struct dsa_switch { @@ -329,6 +330,7 @@ struct dsa_switch_driver { /* * Bridge integration */ + int (*set_ageing_time)(struct dsa_switch *ds, unsigned int msecs); int (*port_bridge_join)(struct dsa_switch *ds, int port, struct net_device *bridge); void (*port_bridge_leave)(struct dsa_switch *ds, int port); diff --git a/include/net/ip.h b/include/net/ip.h index 08f36cd..9742b92 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -47,6 +47,7 @@ struct inet_skb_parm { #define IPSKB_REROUTED BIT(4) #define IPSKB_DOREDIRECT BIT(5) #define IPSKB_FRAG_PMTU BIT(6) +#define IPSKB_FRAG_SEGS BIT(7) u16 frag_max_size; }; diff --git a/include/net/ncsi.h b/include/net/ncsi.h new file mode 100644 index 0000000..1dbf42f --- /dev/null +++ b/include/net/ncsi.h @@ -0,0 +1,52 @@ +#ifndef __NET_NCSI_H +#define __NET_NCSI_H + +/* + * The NCSI device states seen from external. More NCSI device states are + * only visible internally (in net/ncsi/internal.h). When the NCSI device + * is registered, it's in ncsi_dev_state_registered state. The state + * ncsi_dev_state_start is used to drive to choose active package and + * channel. After that, its state is changed to ncsi_dev_state_functional. + * + * The state ncsi_dev_state_stop helps to shut down the currently active + * package and channel while ncsi_dev_state_config helps to reconfigure + * them. + */ +enum { + ncsi_dev_state_registered = 0x0000, + ncsi_dev_state_functional = 0x0100, + ncsi_dev_state_probe = 0x0200, + ncsi_dev_state_config = 0x0300, + ncsi_dev_state_suspend = 0x0400, +}; + +struct ncsi_dev { + int state; + int link_up; + struct net_device *dev; + void (*handler)(struct ncsi_dev *ndev); +}; + +#ifdef CONFIG_NET_NCSI +struct ncsi_dev *ncsi_register_dev(struct net_device *dev, + void (*notifier)(struct ncsi_dev *nd)); +int ncsi_start_dev(struct ncsi_dev *nd); +void ncsi_unregister_dev(struct ncsi_dev *nd); +#else /* !CONFIG_NET_NCSI */ +static inline struct ncsi_dev *ncsi_register_dev(struct net_device *dev, + void (*notifier)(struct ncsi_dev *nd)) +{ + return NULL; +} + +static inline int ncsi_start_dev(struct ncsi_dev *nd) +{ + return -ENOTTY; +} + +static inline void ncsi_unregister_dev(struct ncsi_dev *nd) +{ +} +#endif /* CONFIG_NET_NCSI */ + +#endif /* __NET_NCSI_H */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c4d9224..2b7076f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -94,6 +94,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SCHED_CLS, BPF_PROG_TYPE_SCHED_ACT, BPF_PROG_TYPE_TRACEPOINT, + BPF_PROG_TYPE_XDP, }; #define BPF_PSEUDO_MAP_FD 1 @@ -439,4 +440,24 @@ struct bpf_tunnel_key { __u32 tunnel_label; }; +/* User return codes for XDP prog type. + * A valid XDP program must return one of these defined values. All other + * return codes are reserved for future use. Unknown return codes will result + * in packet drop. + */ +enum xdp_action { + XDP_ABORTED = 0, + XDP_DROP, + XDP_PASS, + XDP_TX, +}; + +/* user accessible metadata for XDP packet hook + * new fields must be added to the end of this structure + */ +struct xdp_md { + __u32 data; + __u32 data_end; +}; + #endif /* _UAPI__LINUX_BPF_H__ */ diff --git a/include/uapi/linux/elf-em.h b/include/uapi/linux/elf-em.h index c3fdfe7..cb5d1a5 100644 --- a/include/uapi/linux/elf-em.h +++ b/include/uapi/linux/elf-em.h @@ -40,6 +40,7 @@ #define EM_TILEPRO 188 /* Tilera TILEPro */ #define EM_MICROBLAZE 189 /* Xilinx MicroBlaze */ #define EM_TILEGX 191 /* Tilera TILE-Gx */ +#define EM_BPF 247 /* Linux BPF - in-kernel virtual machine */ #define EM_FRV 0x5441 /* Fujitsu FR-V */ #define EM_AVR32 0x18ad /* Atmel AVR32 */ diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h index cec849a..117d02e 100644 --- a/include/uapi/linux/if_ether.h +++ b/include/uapi/linux/if_ether.h @@ -87,6 +87,7 @@ #define ETH_P_8021AH 0x88E7 /* 802.1ah Backbone Service Tag */ #define ETH_P_MVRP 0x88F5 /* 802.1Q MVRP */ #define ETH_P_1588 0x88F7 /* IEEE 1588 Timesync */ +#define ETH_P_NCSI 0x88F8 /* NCSI protocol */ #define ETH_P_PRP 0x88FB /* IEC 62439-3 PRP/HSRv0 */ #define ETH_P_FCOE 0x8906 /* Fibre Channel over Ethernet */ #define ETH_P_TDLS 0x890D /* TDLS */ diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 4285ac3..a1b5202 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -156,6 +156,7 @@ enum { IFLA_GSO_MAX_SEGS, IFLA_GSO_MAX_SIZE, IFLA_PAD, + IFLA_XDP, __IFLA_MAX }; @@ -843,4 +844,15 @@ enum { }; #define LINK_XSTATS_TYPE_MAX (__LINK_XSTATS_TYPE_MAX - 1) +/* XDP section */ + +enum { + IFLA_XDP_UNSPEC, + IFLA_XDP_FD, + IFLA_XDP_ATTACHED, + __IFLA_XDP_MAX, +}; + +#define IFLA_XDP_MAX (__IFLA_XDP_MAX - 1) + #endif /* _UAPI_LINUX_IF_LINK_H */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 96d938a..228f962 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -670,14 +670,20 @@ static struct bpf_prog *____bpf_prog_get(struct fd f) return f.file->private_data; } -struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) +struct bpf_prog *bpf_prog_add(struct bpf_prog *prog, int i) { - if (atomic_inc_return(&prog->aux->refcnt) > BPF_MAX_REFCNT) { - atomic_dec(&prog->aux->refcnt); + if (atomic_add_return(i, &prog->aux->refcnt) > BPF_MAX_REFCNT) { + atomic_sub(i, &prog->aux->refcnt); return ERR_PTR(-EBUSY); } return prog; } +EXPORT_SYMBOL_GPL(bpf_prog_add); + +struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog) +{ + return bpf_prog_add(prog, 1); +} static struct bpf_prog *__bpf_prog_get(u32 ufd, enum bpf_prog_type *type) { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index e206c21..f72f23b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -653,6 +653,16 @@ static int check_map_access(struct verifier_env *env, u32 regno, int off, #define MAX_PACKET_OFF 0xffff +static bool may_write_pkt_data(enum bpf_prog_type type) +{ + switch (type) { + case BPF_PROG_TYPE_XDP: + return true; + default: + return false; + } +} + static int check_packet_access(struct verifier_env *env, u32 regno, int off, int size) { @@ -713,6 +723,7 @@ static int check_ptr_alignment(struct verifier_env *env, struct reg_state *reg, switch (env->prog->type) { case BPF_PROG_TYPE_SCHED_CLS: case BPF_PROG_TYPE_SCHED_ACT: + case BPF_PROG_TYPE_XDP: break; default: verbose("verifier is misconfigured\n"); @@ -805,10 +816,15 @@ static int check_mem_access(struct verifier_env *env, u32 regno, int off, err = check_stack_read(state, off, size, value_regno); } } else if (state->regs[regno].type == PTR_TO_PACKET) { - if (t == BPF_WRITE) { + if (t == BPF_WRITE && !may_write_pkt_data(env->prog->type)) { verbose("cannot write into packet\n"); return -EACCES; } + if (t == BPF_WRITE && value_regno >= 0 && + is_pointer_value(env, value_regno)) { + verbose("R%d leaks addr into packet\n", value_regno); + return -EACCES; + } err = check_packet_access(env, regno, off, size); if (!err && t == BPF_READ && value_regno >= 0) mark_reg_unknown_value(state->regs, value_regno); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index ebfbb7d..a12bbd32 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -309,7 +309,9 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, }; struct perf_raw_record raw = { .frag = { - .next = ctx_size ? &frag : NULL, + { + .next = ctx_size ? &frag : NULL, + }, .size = meta_size, .data = meta, }, diff --git a/net/Kconfig b/net/Kconfig index ff40562..c2cdbce 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -237,6 +237,7 @@ source "net/hsr/Kconfig" source "net/switchdev/Kconfig" source "net/l3mdev/Kconfig" source "net/qrtr/Kconfig" +source "net/ncsi/Kconfig" config RPS bool diff --git a/net/Makefile b/net/Makefile index bdd1455..9bd20bb 100644 --- a/net/Makefile +++ b/net/Makefile @@ -79,3 +79,4 @@ ifneq ($(CONFIG_NET_L3_MASTER_DEV),) obj-y += l3mdev/ endif obj-$(CONFIG_QRTR) += qrtr/ +obj-$(CONFIG_NET_NCSI) += ncsi/ diff --git a/net/core/dev.c b/net/core/dev.c index 7894e40..2a9c39f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -94,6 +94,7 @@ #include <linux/ethtool.h> #include <linux/notifier.h> #include <linux/skbuff.h> +#include <linux/bpf.h> #include <net/net_namespace.h> #include <net/sock.h> #include <net/busy_poll.h> @@ -6615,6 +6616,38 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) EXPORT_SYMBOL(dev_change_proto_down); /** + * dev_change_xdp_fd - set or clear a bpf program for a device rx path + * @dev: device + * @fd: new program fd or negative value to clear + * + * Set or clear a bpf program for a device + */ +int dev_change_xdp_fd(struct net_device *dev, int fd) +{ + const struct net_device_ops *ops = dev->netdev_ops; + struct bpf_prog *prog = NULL; + struct netdev_xdp xdp = {}; + int err; + + if (!ops->ndo_xdp) + return -EOPNOTSUPP; + if (fd >= 0) { + prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); + if (IS_ERR(prog)) + return PTR_ERR(prog); + } + + xdp.command = XDP_SETUP_PROG; + xdp.prog = prog; + err = ops->ndo_xdp(dev, &xdp); + if (err < 0 && prog) + bpf_prog_put(prog); + + return err; +} +EXPORT_SYMBOL(dev_change_xdp_fd); + +/** * dev_new_index - allocate an ifindex * @net: the applicable net namespace * diff --git a/net/core/filter.c b/net/core/filter.c index 22e3992..6c627bc 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2410,6 +2410,12 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) } } +static const struct bpf_func_proto * +xdp_func_proto(enum bpf_func_id func_id) +{ + return sk_filter_func_proto(func_id); +} + static bool __is_valid_access(int off, int size, enum bpf_access_type type) { if (off < 0 || off >= sizeof(struct __sk_buff)) @@ -2477,6 +2483,44 @@ static bool tc_cls_act_is_valid_access(int off, int size, return __is_valid_access(off, size, type); } +static bool __is_valid_xdp_access(int off, int size, + enum bpf_access_type type) +{ + if (off < 0 || off >= sizeof(struct xdp_md)) + return false; + if (off % size != 0) + return false; + if (size != 4) + return false; + + return true; +} + +static bool xdp_is_valid_access(int off, int size, + enum bpf_access_type type, + enum bpf_reg_type *reg_type) +{ + if (type == BPF_WRITE) + return false; + + switch (off) { + case offsetof(struct xdp_md, data): + *reg_type = PTR_TO_PACKET; + break; + case offsetof(struct xdp_md, data_end): + *reg_type = PTR_TO_PACKET_END; + break; + } + + return __is_valid_xdp_access(off, size, type); +} + +void bpf_warn_invalid_xdp_action(u32 act) +{ + WARN_ONCE(1, "Illegal XDP return value %u, expect packet loss\n", act); +} +EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); + static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, struct bpf_insn *insn_buf, @@ -2628,6 +2672,29 @@ static u32 bpf_net_convert_ctx_access(enum bpf_access_type type, int dst_reg, return insn - insn_buf; } +static u32 xdp_convert_ctx_access(enum bpf_access_type type, int dst_reg, + int src_reg, int ctx_off, + struct bpf_insn *insn_buf, + struct bpf_prog *prog) +{ + struct bpf_insn *insn = insn_buf; + + switch (ctx_off) { + case offsetof(struct xdp_md, data): + *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct xdp_buff, data)), + dst_reg, src_reg, + offsetof(struct xdp_buff, data)); + break; + case offsetof(struct xdp_md, data_end): + *insn++ = BPF_LDX_MEM(bytes_to_bpf_size(FIELD_SIZEOF(struct xdp_buff, data_end)), + dst_reg, src_reg, + offsetof(struct xdp_buff, data_end)); + break; + } + + return insn - insn_buf; +} + static const struct bpf_verifier_ops sk_filter_ops = { .get_func_proto = sk_filter_func_proto, .is_valid_access = sk_filter_is_valid_access, @@ -2640,6 +2707,12 @@ static const struct bpf_verifier_ops tc_cls_act_ops = { .convert_ctx_access = bpf_net_convert_ctx_access, }; +static const struct bpf_verifier_ops xdp_ops = { + .get_func_proto = xdp_func_proto, + .is_valid_access = xdp_is_valid_access, + .convert_ctx_access = xdp_convert_ctx_access, +}; + static struct bpf_prog_type_list sk_filter_type __read_mostly = { .ops = &sk_filter_ops, .type = BPF_PROG_TYPE_SOCKET_FILTER, @@ -2655,11 +2728,17 @@ static struct bpf_prog_type_list sched_act_type __read_mostly = { .type = BPF_PROG_TYPE_SCHED_ACT, }; +static struct bpf_prog_type_list xdp_type __read_mostly = { + .ops = &xdp_ops, + .type = BPF_PROG_TYPE_XDP, +}; + static int __init register_sk_filter_ops(void) { bpf_register_prog_type(&sk_filter_type); bpf_register_prog_type(&sched_cls_type); bpf_register_prog_type(&sched_act_type); + bpf_register_prog_type(&xdp_type); return 0; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index a9e3805..eba2b82 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -891,6 +891,16 @@ static size_t rtnl_port_size(const struct net_device *dev, return port_self_size; } +static size_t rtnl_xdp_size(const struct net_device *dev) +{ + size_t xdp_size = nla_total_size(1); /* XDP_ATTACHED */ + + if (!dev->netdev_ops->ndo_xdp) + return 0; + else + return xdp_size; +} + static noinline size_t if_nlmsg_size(const struct net_device *dev, u32 ext_filter_mask) { @@ -927,6 +937,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev, + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_PORT_ID */ + nla_total_size(MAX_PHYS_ITEM_ID_LEN) /* IFLA_PHYS_SWITCH_ID */ + nla_total_size(IFNAMSIZ) /* IFLA_PHYS_PORT_NAME */ + + rtnl_xdp_size(dev) /* IFLA_XDP */ + nla_total_size(1); /* IFLA_PROTO_DOWN */ } @@ -1211,6 +1222,33 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) return 0; } +static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) +{ + struct netdev_xdp xdp_op = {}; + struct nlattr *xdp; + int err; + + if (!dev->netdev_ops->ndo_xdp) + return 0; + xdp = nla_nest_start(skb, IFLA_XDP); + if (!xdp) + return -EMSGSIZE; + xdp_op.command = XDP_QUERY_PROG; + err = dev->netdev_ops->ndo_xdp(dev, &xdp_op); + if (err) + goto err_cancel; + err = nla_put_u8(skb, IFLA_XDP_ATTACHED, xdp_op.prog_attached); + if (err) + goto err_cancel; + + nla_nest_end(skb, xdp); + return 0; + +err_cancel: + nla_nest_cancel(skb, xdp); + return err; +} + static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, int type, u32 pid, u32 seq, u32 change, unsigned int flags, u32 ext_filter_mask) @@ -1307,6 +1345,9 @@ static int rtnl_fill_ifinfo(struct sk_buff *skb, struct net_device *dev, if (rtnl_port_fill(skb, dev, ext_filter_mask)) goto nla_put_failure; + if (rtnl_xdp_fill(skb, dev)) + goto nla_put_failure; + if (dev->rtnl_link_ops || rtnl_have_link_slave_info(dev)) { if (rtnl_link_fill(skb, dev) < 0) goto nla_put_failure; @@ -1392,6 +1433,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = { [IFLA_PHYS_SWITCH_ID] = { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN }, [IFLA_LINK_NETNSID] = { .type = NLA_S32 }, [IFLA_PROTO_DOWN] = { .type = NLA_U8 }, + [IFLA_XDP] = { .type = NLA_NESTED }, }; static const struct nla_policy ifla_info_policy[IFLA_INFO_MAX+1] = { @@ -1429,6 +1471,11 @@ static const struct nla_policy ifla_port_policy[IFLA_PORT_MAX+1] = { [IFLA_PORT_RESPONSE] = { .type = NLA_U16, }, }; +static const struct nla_policy ifla_xdp_policy[IFLA_XDP_MAX + 1] = { + [IFLA_XDP_FD] = { .type = NLA_S32 }, + [IFLA_XDP_ATTACHED] = { .type = NLA_U8 }, +}; + static const struct rtnl_link_ops *linkinfo_to_kind_ops(const struct nlattr *nla) { const struct rtnl_link_ops *ops = NULL; @@ -2054,6 +2101,23 @@ static int do_setlink(const struct sk_buff *skb, status |= DO_SETLINK_NOTIFY; } + if (tb[IFLA_XDP]) { + struct nlattr *xdp[IFLA_XDP_MAX + 1]; + + err = nla_parse_nested(xdp, IFLA_XDP_MAX, tb[IFLA_XDP], + ifla_xdp_policy); + if (err < 0) + goto errout; + + if (xdp[IFLA_XDP_FD]) { + err = dev_change_xdp_fd(dev, + nla_get_s32(xdp[IFLA_XDP_FD])); + if (err) + goto errout; + status |= DO_SETLINK_NOTIFY; + } + } + errout: if (status & DO_SETLINK_MODIFIED) { if (status & DO_SETLINK_NOTIFY) diff --git a/net/dsa/slave.c b/net/dsa/slave.c index 7236eb2..fc91967 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -333,6 +333,44 @@ static int dsa_slave_vlan_filtering(struct net_device *dev, return 0; } +static int dsa_fastest_ageing_time(struct dsa_switch *ds, + unsigned int ageing_time) +{ + int i; + + for (i = 0; i < DSA_MAX_PORTS; ++i) { + struct dsa_port *dp = &ds->ports[i]; + + if (dp && dp->ageing_time && dp->ageing_time < ageing_time) + ageing_time = dp->ageing_time; + } + + return ageing_time; +} + +static int dsa_slave_ageing_time(struct net_device *dev, + const struct switchdev_attr *attr, + struct switchdev_trans *trans) +{ + struct dsa_slave_priv *p = netdev_priv(dev); + struct dsa_switch *ds = p->parent; + unsigned long ageing_jiffies = clock_t_to_jiffies(attr->u.ageing_time); + unsigned int ageing_time = jiffies_to_msecs(ageing_jiffies); + + /* bridge skips -EOPNOTSUPP, so skip the prepare phase */ + if (switchdev_trans_ph_prepare(trans)) + return 0; + + /* Keep the fastest ageing time in case of multiple bridges */ + ds->ports[p->port].ageing_time = ageing_time; + ageing_time = dsa_fastest_ageing_time(ds, ageing_time); + + if (ds->drv->set_ageing_time) + return ds->drv->set_ageing_time(ds, ageing_time); + + return 0; +} + static int dsa_slave_port_attr_set(struct net_device *dev, const struct switchdev_attr *attr, struct switchdev_trans *trans) @@ -346,6 +384,9 @@ static int dsa_slave_port_attr_set(struct net_device *dev, case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING: ret = dsa_slave_vlan_filtering(dev, attr, trans); break; + case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME: + ret = dsa_slave_ageing_time(dev, attr, trans); + break; default: ret = -EOPNOTSUPP; break; diff --git a/net/ipv4/ip_forward.c b/net/ipv4/ip_forward.c index 9f0a7b9..8b4ffd2 100644 --- a/net/ipv4/ip_forward.c +++ b/net/ipv4/ip_forward.c @@ -117,7 +117,7 @@ int ip_forward(struct sk_buff *skb) if (opt->is_strictroute && rt->rt_uses_gateway) goto sr_failed; - IPCB(skb)->flags |= IPSKB_FORWARDED; + IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS; mtu = ip_dst_mtu_maybe_forward(&rt->dst, true); if (ip_exceeds_mtu(skb, mtu)) { IP_INC_STATS(net, IPSTATS_MIB_FRAGFAILS); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index e23f141..dde37fb 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -223,8 +223,10 @@ static int ip_finish_output_gso(struct net *net, struct sock *sk, struct sk_buff *segs; int ret = 0; - /* common case: locally created skb or seglen is <= mtu */ - if (((IPCB(skb)->flags & IPSKB_FORWARDED) == 0) || + /* common case: fragmentation of segments is not allowed, + * or seglen is <= mtu + */ + if (((IPCB(skb)->flags & IPSKB_FRAG_SEGS) == 0) || skb_gso_validate_mtu(skb, mtu)) return ip_finish_output2(net, sk, skb); diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c index afd6b59..9d847c3 100644 --- a/net/ipv4/ip_tunnel_core.c +++ b/net/ipv4/ip_tunnel_core.c @@ -63,6 +63,7 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, int pkt_len = skb->len - skb_inner_network_offset(skb); struct net *net = dev_net(rt->dst.dev); struct net_device *dev = skb->dev; + int skb_iif = skb->skb_iif; struct iphdr *iph; int err; @@ -72,6 +73,14 @@ void iptunnel_xmit(struct sock *sk, struct rtable *rt, struct sk_buff *skb, skb_dst_set(skb, &rt->dst); memset(IPCB(skb), 0, sizeof(*IPCB(skb))); + if (skb_iif && proto == IPPROTO_UDP) { + /* Arrived from an ingress interface and got udp encapuslated. + * The encapsulated network segment length may exceed dst mtu. + * Allow IP Fragmentation of segments. + */ + IPCB(skb)->flags |= IPSKB_FRAG_SEGS; + } + /* Push down and install the IP header. */ skb_push(skb, sizeof(struct iphdr)); skb_reset_network_header(skb); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index e0d76f5..eec2341 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1749,7 +1749,7 @@ static void ipmr_queue_xmit(struct net *net, struct mr_table *mrt, vif->dev->stats.tx_bytes += skb->len; } - IPCB(skb)->flags |= IPSKB_FORWARDED; + IPCB(skb)->flags |= IPSKB_FORWARDED | IPSKB_FRAG_SEGS; /* RFC1584 teaches, that DVMRP/PIM router must deliver packets locally * not only before forwarding, but after forwarding on all output diff --git a/net/ncsi/Kconfig b/net/ncsi/Kconfig new file mode 100644 index 0000000..08a8a60 --- /dev/null +++ b/net/ncsi/Kconfig @@ -0,0 +1,12 @@ +# +# Configuration for NCSI support +# + +config NET_NCSI + bool "NCSI interface support" + depends on INET + ---help--- + This module provides NCSI (Network Controller Sideband Interface) + support. Enable this only if your system connects to a network + device via NCSI and the ethernet driver you're using supports + the protocol explicitly. diff --git a/net/ncsi/Makefile b/net/ncsi/Makefile new file mode 100644 index 0000000..dd12b56 --- /dev/null +++ b/net/ncsi/Makefile @@ -0,0 +1,4 @@ +# +# Makefile for NCSI API +# +obj-$(CONFIG_NET_NCSI) += ncsi-cmd.o ncsi-rsp.o ncsi-aen.o ncsi-manage.o diff --git a/net/ncsi/internal.h b/net/ncsi/internal.h new file mode 100644 index 0000000..33738c0 --- /dev/null +++ b/net/ncsi/internal.h @@ -0,0 +1,328 @@ +/* + * Copyright Gavin Shan, IBM Corporation 2016. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __NCSI_INTERNAL_H__ +#define __NCSI_INTERNAL_H__ + +enum { + NCSI_CAP_BASE = 0, + NCSI_CAP_GENERIC = 0, + NCSI_CAP_BC, + NCSI_CAP_MC, + NCSI_CAP_BUFFER, + NCSI_CAP_AEN, + NCSI_CAP_VLAN, + NCSI_CAP_MAX +}; + +enum { + NCSI_CAP_GENERIC_HWA = 0x01, /* HW arbitration */ + NCSI_CAP_GENERIC_HDS = 0x02, /* HNC driver status change */ + NCSI_CAP_GENERIC_FC = 0x04, /* HNC to MC flow control */ + NCSI_CAP_GENERIC_FC1 = 0x08, /* MC to HNC flow control */ + NCSI_CAP_GENERIC_MC = 0x10, /* Global MC filtering */ + NCSI_CAP_GENERIC_HWA_UNKNOWN = 0x00, /* Unknown HW arbitration */ + NCSI_CAP_GENERIC_HWA_SUPPORT = 0x20, /* Supported HW arbitration */ + NCSI_CAP_GENERIC_HWA_NOT_SUPPORT = 0x40, /* No HW arbitration */ + NCSI_CAP_GENERIC_HWA_RESERVED = 0x60, /* Reserved HW arbitration */ + NCSI_CAP_GENERIC_HWA_MASK = 0x60, /* Mask for HW arbitration */ + NCSI_CAP_GENERIC_MASK = 0x7f, + NCSI_CAP_BC_ARP = 0x01, /* ARP packet filtering */ + NCSI_CAP_BC_DHCPC = 0x02, /* DHCP client filtering */ + NCSI_CAP_BC_DHCPS = 0x04, /* DHCP server filtering */ + NCSI_CAP_BC_NETBIOS = 0x08, /* NetBIOS packet filtering */ + NCSI_CAP_BC_MASK = 0x0f, + NCSI_CAP_MC_IPV6_NEIGHBOR = 0x01, /* IPv6 neighbor filtering */ + NCSI_CAP_MC_IPV6_ROUTER = 0x02, /* IPv6 router filering */ + NCSI_CAP_MC_DHCPV6_RELAY = 0x04, /* DHCPv6 relay / server MC */ + NCSI_CAP_MC_DHCPV6_WELL_KNOWN = 0x08, /* DHCPv6 well-known MC */ + NCSI_CAP_MC_IPV6_MLD = 0x10, /* IPv6 MLD filtering */ + NCSI_CAP_MC_IPV6_NEIGHBOR_S = 0x20, /* IPv6 neighbour filtering */ + NCSI_CAP_MC_MASK = 0x3f, + NCSI_CAP_AEN_LSC = 0x01, /* Link status change */ + NCSI_CAP_AEN_CR = 0x02, /* Configuration required */ + NCSI_CAP_AEN_HDS = 0x04, /* HNC driver status */ + NCSI_CAP_AEN_MASK = 0x07, + NCSI_CAP_VLAN_ONLY = 0x01, /* Filter VLAN packet only */ + NCSI_CAP_VLAN_NO = 0x02, /* Filter VLAN and non-VLAN */ + NCSI_CAP_VLAN_ANY = 0x04, /* Filter Any-and-non-VLAN */ + NCSI_CAP_VLAN_MASK = 0x07 +}; + +enum { + NCSI_MODE_BASE = 0, + NCSI_MODE_ENABLE = 0, + NCSI_MODE_TX_ENABLE, + NCSI_MODE_LINK, + NCSI_MODE_VLAN, + NCSI_MODE_BC, + NCSI_MODE_MC, + NCSI_MODE_AEN, + NCSI_MODE_FC, + NCSI_MODE_MAX +}; + +enum { + NCSI_FILTER_BASE = 0, + NCSI_FILTER_VLAN = 0, + NCSI_FILTER_UC, + NCSI_FILTER_MC, + NCSI_FILTER_MIXED, + NCSI_FILTER_MAX +}; + +struct ncsi_channel_version { + u32 version; /* Supported BCD encoded NCSI version */ + u32 alpha2; /* Supported BCD encoded NCSI version */ + u8 fw_name[12]; /* Firware name string */ + u32 fw_version; /* Firmware version */ + u16 pci_ids[4]; /* PCI identification */ + u32 mf_id; /* Manufacture ID */ +}; + +struct ncsi_channel_cap { + u32 index; /* Index of channel capabilities */ + u32 cap; /* NCSI channel capability */ +}; + +struct ncsi_channel_mode { + u32 index; /* Index of channel modes */ + u32 enable; /* Enabled or disabled */ + u32 size; /* Valid entries in ncm_data[] */ + u32 data[8]; /* Data entries */ +}; + +struct ncsi_channel_filter { + u32 index; /* Index of channel filters */ + u32 total; /* Total entries in the filter table */ + u64 bitmap; /* Bitmap of valid entries */ + u32 data[]; /* Data for the valid entries */ +}; + +struct ncsi_channel_stats { + u32 hnc_cnt_hi; /* Counter cleared */ + u32 hnc_cnt_lo; /* Counter cleared */ + u32 hnc_rx_bytes; /* Rx bytes */ + u32 hnc_tx_bytes; /* Tx bytes */ + u32 hnc_rx_uc_pkts; /* Rx UC packets */ + u32 hnc_rx_mc_pkts; /* Rx MC packets */ + u32 hnc_rx_bc_pkts; /* Rx BC packets */ + u32 hnc_tx_uc_pkts; /* Tx UC packets */ + u32 hnc_tx_mc_pkts; /* Tx MC packets */ + u32 hnc_tx_bc_pkts; /* Tx BC packets */ + u32 hnc_fcs_err; /* FCS errors */ + u32 hnc_align_err; /* Alignment errors */ + u32 hnc_false_carrier; /* False carrier detection */ + u32 hnc_runt_pkts; /* Rx runt packets */ + u32 hnc_jabber_pkts; /* Rx jabber packets */ + u32 hnc_rx_pause_xon; /* Rx pause XON frames */ + u32 hnc_rx_pause_xoff; /* Rx XOFF frames */ + u32 hnc_tx_pause_xon; /* Tx XON frames */ + u32 hnc_tx_pause_xoff; /* Tx XOFF frames */ + u32 hnc_tx_s_collision; /* Single collision frames */ + u32 hnc_tx_m_collision; /* Multiple collision frames */ + u32 hnc_l_collision; /* Late collision frames */ + u32 hnc_e_collision; /* Excessive collision frames */ + u32 hnc_rx_ctl_frames; /* Rx control frames */ + u32 hnc_rx_64_frames; /* Rx 64-bytes frames */ + u32 hnc_rx_127_frames; /* Rx 65-127 bytes frames */ + u32 hnc_rx_255_frames; /* Rx 128-255 bytes frames */ + u32 hnc_rx_511_frames; /* Rx 256-511 bytes frames */ + u32 hnc_rx_1023_frames; /* Rx 512-1023 bytes frames */ + u32 hnc_rx_1522_frames; /* Rx 1024-1522 bytes frames */ + u32 hnc_rx_9022_frames; /* Rx 1523-9022 bytes frames */ + u32 hnc_tx_64_frames; /* Tx 64-bytes frames */ + u32 hnc_tx_127_frames; /* Tx 65-127 bytes frames */ + u32 hnc_tx_255_frames; /* Tx 128-255 bytes frames */ + u32 hnc_tx_511_frames; /* Tx 256-511 bytes frames */ + u32 hnc_tx_1023_frames; /* Tx 512-1023 bytes frames */ + u32 hnc_tx_1522_frames; /* Tx 1024-1522 bytes frames */ + u32 hnc_tx_9022_frames; /* Tx 1523-9022 bytes frames */ + u32 hnc_rx_valid_bytes; /* Rx valid bytes */ + u32 hnc_rx_runt_pkts; /* Rx error runt packets */ + u32 hnc_rx_jabber_pkts; /* Rx error jabber packets */ + u32 ncsi_rx_cmds; /* Rx NCSI commands */ + u32 ncsi_dropped_cmds; /* Dropped commands */ + u32 ncsi_cmd_type_errs; /* Command type errors */ + u32 ncsi_cmd_csum_errs; /* Command checksum errors */ + u32 ncsi_rx_pkts; /* Rx NCSI packets */ + u32 ncsi_tx_pkts; /* Tx NCSI packets */ + u32 ncsi_tx_aen_pkts; /* Tx AEN packets */ + u32 pt_tx_pkts; /* Tx packets */ + u32 pt_tx_dropped; /* Tx dropped packets */ + u32 pt_tx_channel_err; /* Tx channel errors */ + u32 pt_tx_us_err; /* Tx undersize errors */ + u32 pt_rx_pkts; /* Rx packets */ + u32 pt_rx_dropped; /* Rx dropped packets */ + u32 pt_rx_channel_err; /* Rx channel errors */ + u32 pt_rx_us_err; /* Rx undersize errors */ + u32 pt_rx_os_err; /* Rx oversize errors */ +}; + +struct ncsi_dev_priv; +struct ncsi_package; + +#define NCSI_PACKAGE_SHIFT 5 +#define NCSI_PACKAGE_INDEX(c) (((c) >> NCSI_PACKAGE_SHIFT) & 0x7) +#define NCSI_CHANNEL_INDEX(c) ((c) & ((1 << NCSI_PACKAGE_SHIFT) - 1)) +#define NCSI_TO_CHANNEL(p, c) (((p) << NCSI_PACKAGE_SHIFT) | (c)) + +struct ncsi_channel { + unsigned char id; + int state; +#define NCSI_CHANNEL_INACTIVE 1 +#define NCSI_CHANNEL_ACTIVE 2 +#define NCSI_CHANNEL_INVISIBLE 3 + spinlock_t lock; /* Protect filters etc */ + struct ncsi_package *package; + struct ncsi_channel_version version; + struct ncsi_channel_cap caps[NCSI_CAP_MAX]; + struct ncsi_channel_mode modes[NCSI_MODE_MAX]; + struct ncsi_channel_filter *filters[NCSI_FILTER_MAX]; + struct ncsi_channel_stats stats; + struct timer_list timer; /* Link monitor timer */ + bool enabled; /* Timer is enabled */ + unsigned int timeout; /* Times of timeout */ + struct list_head node; + struct list_head link; +}; + +struct ncsi_package { + unsigned char id; /* NCSI 3-bits package ID */ + unsigned char uuid[16]; /* UUID */ + struct ncsi_dev_priv *ndp; /* NCSI device */ + spinlock_t lock; /* Protect the package */ + unsigned int channel_num; /* Number of channels */ + struct list_head channels; /* List of chanels */ + struct list_head node; /* Form list of packages */ +}; + +struct ncsi_request { + unsigned char id; /* Request ID - 0 to 255 */ + bool used; /* Request that has been assigned */ + bool driven; /* Drive state machine */ + struct ncsi_dev_priv *ndp; /* Associated NCSI device */ + struct sk_buff *cmd; /* Associated NCSI command packet */ + struct sk_buff *rsp; /* Associated NCSI response packet */ + struct timer_list timer; /* Timer on waiting for response */ + bool enabled; /* Time has been enabled or not */ +}; + +enum { + ncsi_dev_state_major = 0xff00, + ncsi_dev_state_minor = 0x00ff, + ncsi_dev_state_probe_deselect = 0x0201, + ncsi_dev_state_probe_package, + ncsi_dev_state_probe_channel, + ncsi_dev_state_probe_cis, + ncsi_dev_state_probe_gvi, + ncsi_dev_state_probe_gc, + ncsi_dev_state_probe_gls, + ncsi_dev_state_probe_dp, + ncsi_dev_state_config_sp = 0x0301, + ncsi_dev_state_config_cis, + ncsi_dev_state_config_sma, + ncsi_dev_state_config_ebf, +#if IS_ENABLED(CONFIG_IPV6) + ncsi_dev_state_config_egmf, +#endif + ncsi_dev_state_config_ecnt, + ncsi_dev_state_config_ec, + ncsi_dev_state_config_ae, + ncsi_dev_state_config_gls, + ncsi_dev_state_config_done, + ncsi_dev_state_suspend_select = 0x0401, + ncsi_dev_state_suspend_dcnt, + ncsi_dev_state_suspend_dc, + ncsi_dev_state_suspend_deselect, + ncsi_dev_state_suspend_done +}; + +struct ncsi_dev_priv { + struct ncsi_dev ndev; /* Associated NCSI device */ + unsigned int flags; /* NCSI device flags */ +#define NCSI_DEV_PROBED 1 /* Finalized NCSI topology */ +#define NCSI_DEV_HWA 2 /* Enabled HW arbitration */ +#define NCSI_DEV_RESHUFFLE 4 + spinlock_t lock; /* Protect the NCSI device */ +#if IS_ENABLED(CONFIG_IPV6) + unsigned int inet6_addr_num; /* Number of IPv6 addresses */ +#endif + unsigned int package_num; /* Number of packages */ + struct list_head packages; /* List of packages */ + struct ncsi_request requests[256]; /* Request table */ + unsigned int request_id; /* Last used request ID */ + unsigned int pending_req_num; /* Number of pending requests */ + struct ncsi_package *active_package; /* Currently handled package */ + struct ncsi_channel *active_channel; /* Currently handled channel */ + struct list_head channel_queue; /* Config queue of channels */ + struct work_struct work; /* For channel management */ + struct packet_type ptype; /* NCSI packet Rx handler */ + struct list_head node; /* Form NCSI device list */ +}; + +struct ncsi_cmd_arg { + struct ncsi_dev_priv *ndp; /* Associated NCSI device */ + unsigned char type; /* Command in the NCSI packet */ + unsigned char id; /* Request ID (sequence number) */ + unsigned char package; /* Destination package ID */ + unsigned char channel; /* Detination channel ID or 0x1f */ + unsigned short payload; /* Command packet payload length */ + bool driven; /* Drive the state machine? */ + union { + unsigned char bytes[16]; /* Command packet specific data */ + unsigned short words[8]; + unsigned int dwords[4]; + }; +}; + +extern struct list_head ncsi_dev_list; +extern spinlock_t ncsi_dev_lock; + +#define TO_NCSI_DEV_PRIV(nd) \ + container_of(nd, struct ncsi_dev_priv, ndev) +#define NCSI_FOR_EACH_DEV(ndp) \ + list_for_each_entry_rcu(ndp, &ncsi_dev_list, node) +#define NCSI_FOR_EACH_PACKAGE(ndp, np) \ + list_for_each_entry_rcu(np, &ndp->packages, node) +#define NCSI_FOR_EACH_CHANNEL(np, nc) \ + list_for_each_entry_rcu(nc, &np->channels, node) + +/* Resources */ +int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data); +int ncsi_add_filter(struct ncsi_channel *nc, int table, void *data); +int ncsi_remove_filter(struct ncsi_channel *nc, int table, int index); +void ncsi_start_channel_monitor(struct ncsi_channel *nc); +void ncsi_stop_channel_monitor(struct ncsi_channel *nc); +struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np, + unsigned char id); +struct ncsi_channel *ncsi_add_channel(struct ncsi_package *np, + unsigned char id); +struct ncsi_package *ncsi_find_package(struct ncsi_dev_priv *ndp, + unsigned char id); +struct ncsi_package *ncsi_add_package(struct ncsi_dev_priv *ndp, + unsigned char id); +void ncsi_remove_package(struct ncsi_package *np); +void ncsi_find_package_and_channel(struct ncsi_dev_priv *ndp, + unsigned char id, + struct ncsi_package **np, + struct ncsi_channel **nc); +struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven); +void ncsi_free_request(struct ncsi_request *nr); +struct ncsi_dev *ncsi_find_dev(struct net_device *dev); +int ncsi_process_next_channel(struct ncsi_dev_priv *ndp); + +/* Packet handlers */ +u32 ncsi_calculate_checksum(unsigned char *data, int len); +int ncsi_xmit_cmd(struct ncsi_cmd_arg *nca); +int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev); +int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb); + +#endif /* __NCSI_INTERNAL_H__ */ diff --git a/net/ncsi/ncsi-aen.c b/net/ncsi/ncsi-aen.c new file mode 100644 index 0000000..d463468 --- /dev/null +++ b/net/ncsi/ncsi-aen.c @@ -0,0 +1,193 @@ +/* + * Copyright Gavin Shan, IBM Corporation 2016. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> + +#include <net/ncsi.h> +#include <net/net_namespace.h> +#include <net/sock.h> + +#include "internal.h" +#include "ncsi-pkt.h" + +static int ncsi_validate_aen_pkt(struct ncsi_aen_pkt_hdr *h, + const unsigned short payload) +{ + u32 checksum; + __be32 *pchecksum; + + if (h->common.revision != NCSI_PKT_REVISION) + return -EINVAL; + if (ntohs(h->common.length) != payload) + return -EINVAL; + + /* Validate checksum, which might be zeroes if the + * sender doesn't support checksum according to NCSI + * specification. + */ + pchecksum = (__be32 *)((void *)(h + 1) + payload - 4); + if (ntohl(*pchecksum) == 0) + return 0; + + checksum = ncsi_calculate_checksum((unsigned char *)h, + sizeof(*h) + payload - 4); + if (*pchecksum != htonl(checksum)) + return -EINVAL; + + return 0; +} + +static int ncsi_aen_handler_lsc(struct ncsi_dev_priv *ndp, + struct ncsi_aen_pkt_hdr *h) +{ + struct ncsi_aen_lsc_pkt *lsc; + struct ncsi_channel *nc; + struct ncsi_channel_mode *ncm; + unsigned long old_data; + unsigned long flags; + + /* Find the NCSI channel */ + ncsi_find_package_and_channel(ndp, h->common.channel, NULL, &nc); + if (!nc) + return -ENODEV; + + /* Update the link status */ + ncm = &nc->modes[NCSI_MODE_LINK]; + lsc = (struct ncsi_aen_lsc_pkt *)h; + old_data = ncm->data[2]; + ncm->data[2] = ntohl(lsc->status); + ncm->data[4] = ntohl(lsc->oem_status); + if (!((old_data ^ ncm->data[2]) & 0x1) || + !list_empty(&nc->link)) + return 0; + if (!(nc->state == NCSI_CHANNEL_INACTIVE && (ncm->data[2] & 0x1)) && + !(nc->state == NCSI_CHANNEL_ACTIVE && !(ncm->data[2] & 0x1))) + return 0; + + if (!(ndp->flags & NCSI_DEV_HWA) && + nc->state == NCSI_CHANNEL_ACTIVE) + ndp->flags |= NCSI_DEV_RESHUFFLE; + + ncsi_stop_channel_monitor(nc); + spin_lock_irqsave(&ndp->lock, flags); + list_add_tail_rcu(&nc->link, &ndp->channel_queue); + spin_unlock_irqrestore(&ndp->lock, flags); + + return ncsi_process_next_channel(ndp); +} + +static int ncsi_aen_handler_cr(struct ncsi_dev_priv *ndp, + struct ncsi_aen_pkt_hdr *h) +{ + struct ncsi_channel *nc; + unsigned long flags; + + /* Find the NCSI channel */ + ncsi_find_package_and_channel(ndp, h->common.channel, NULL, &nc); + if (!nc) + return -ENODEV; + + if (!list_empty(&nc->link) || + nc->state != NCSI_CHANNEL_ACTIVE) + return 0; + + ncsi_stop_channel_monitor(nc); + spin_lock_irqsave(&ndp->lock, flags); + xchg(&nc->state, NCSI_CHANNEL_INACTIVE); + list_add_tail_rcu(&nc->link, &ndp->channel_queue); + spin_unlock_irqrestore(&ndp->lock, flags); + + return ncsi_process_next_channel(ndp); +} + +static int ncsi_aen_handler_hncdsc(struct ncsi_dev_priv *ndp, + struct ncsi_aen_pkt_hdr *h) +{ + struct ncsi_channel *nc; + struct ncsi_channel_mode *ncm; + struct ncsi_aen_hncdsc_pkt *hncdsc; + unsigned long flags; + + /* Find the NCSI channel */ + ncsi_find_package_and_channel(ndp, h->common.channel, NULL, &nc); + if (!nc) + return -ENODEV; + + /* If the channel is active one, we need reconfigure it */ + ncm = &nc->modes[NCSI_MODE_LINK]; + hncdsc = (struct ncsi_aen_hncdsc_pkt *)h; + ncm->data[3] = ntohl(hncdsc->status); + if (!list_empty(&nc->link) || + nc->state != NCSI_CHANNEL_ACTIVE || + (ncm->data[3] & 0x1)) + return 0; + + if (ndp->flags & NCSI_DEV_HWA) + ndp->flags |= NCSI_DEV_RESHUFFLE; + + /* If this channel is the active one and the link doesn't + * work, we have to choose another channel to be active one. + * The logic here is exactly similar to what we do when link + * is down on the active channel. + */ + ncsi_stop_channel_monitor(nc); + spin_lock_irqsave(&ndp->lock, flags); + list_add_tail_rcu(&nc->link, &ndp->channel_queue); + spin_unlock_irqrestore(&ndp->lock, flags); + + ncsi_process_next_channel(ndp); + + return 0; +} + +static struct ncsi_aen_handler { + unsigned char type; + int payload; + int (*handler)(struct ncsi_dev_priv *ndp, + struct ncsi_aen_pkt_hdr *h); +} ncsi_aen_handlers[] = { + { NCSI_PKT_AEN_LSC, 12, ncsi_aen_handler_lsc }, + { NCSI_PKT_AEN_CR, 4, ncsi_aen_handler_cr }, + { NCSI_PKT_AEN_HNCDSC, 4, ncsi_aen_handler_hncdsc } +}; + +int ncsi_aen_handler(struct ncsi_dev_priv *ndp, struct sk_buff *skb) +{ + struct ncsi_aen_pkt_hdr *h; + struct ncsi_aen_handler *nah = NULL; + int i, ret; + + /* Find the handler */ + h = (struct ncsi_aen_pkt_hdr *)skb_network_header(skb); + for (i = 0; i < ARRAY_SIZE(ncsi_aen_handlers); i++) { + if (ncsi_aen_handlers[i].type == h->type) { + nah = &ncsi_aen_handlers[i]; + break; + } + } + + if (!nah) { + netdev_warn(ndp->ndev.dev, "Invalid AEN (0x%x) received\n", + h->type); + return -ENOENT; + } + + ret = ncsi_validate_aen_pkt(h, nah->payload); + if (ret) + goto out; + + ret = nah->handler(ndp, h); +out: + consume_skb(skb); + return ret; +} diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c new file mode 100644 index 0000000..21057a8 --- /dev/null +++ b/net/ncsi/ncsi-cmd.c @@ -0,0 +1,367 @@ +/* + * Copyright Gavin Shan, IBM Corporation 2016. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/etherdevice.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> + +#include <net/ncsi.h> +#include <net/net_namespace.h> +#include <net/sock.h> + +#include "internal.h" +#include "ncsi-pkt.h" + +u32 ncsi_calculate_checksum(unsigned char *data, int len) +{ + u32 checksum = 0; + int i; + + for (i = 0; i < len; i += 2) + checksum += (((u32)data[i] << 8) | data[i + 1]); + + checksum = (~checksum + 1); + return checksum; +} + +/* This function should be called after the data area has been + * populated completely. + */ +static void ncsi_cmd_build_header(struct ncsi_pkt_hdr *h, + struct ncsi_cmd_arg *nca) +{ + u32 checksum; + __be32 *pchecksum; + + h->mc_id = 0; + h->revision = NCSI_PKT_REVISION; + h->reserved = 0; + h->id = nca->id; + h->type = nca->type; + h->channel = NCSI_TO_CHANNEL(nca->package, + nca->channel); + h->length = htons(nca->payload); + h->reserved1[0] = 0; + h->reserved1[1] = 0; + + /* Fill with calculated checksum */ + checksum = ncsi_calculate_checksum((unsigned char *)h, + sizeof(*h) + nca->payload); + pchecksum = (__be32 *)((void *)h + sizeof(struct ncsi_pkt_hdr) + + nca->payload); + *pchecksum = htonl(checksum); +} + +static int ncsi_cmd_handler_default(struct sk_buff *skb, + struct ncsi_cmd_arg *nca) +{ + struct ncsi_cmd_pkt *cmd; + + cmd = (struct ncsi_cmd_pkt *)skb_put(skb, sizeof(*cmd)); + memset(cmd, 0, sizeof(*cmd)); + ncsi_cmd_build_header(&cmd->cmd.common, nca); + + return 0; +} + +static int ncsi_cmd_handler_sp(struct sk_buff *skb, + struct ncsi_cmd_arg *nca) +{ + struct ncsi_cmd_sp_pkt *cmd; + + cmd = (struct ncsi_cmd_sp_pkt *)skb_put(skb, sizeof(*cmd)); + memset(cmd, 0, sizeof(*cmd)); + cmd->hw_arbitration = nca->bytes[0]; + ncsi_cmd_build_header(&cmd->cmd.common, nca); + + return 0; +} + +static int ncsi_cmd_handler_dc(struct sk_buff *skb, + struct ncsi_cmd_arg *nca) +{ + struct ncsi_cmd_dc_pkt *cmd; + + cmd = (struct ncsi_cmd_dc_pkt *)skb_put(skb, sizeof(*cmd)); + memset(cmd, 0, sizeof(*cmd)); + cmd->ald = nca->bytes[0]; + ncsi_cmd_build_header(&cmd->cmd.common, nca); + + return 0; +} + +static int ncsi_cmd_handler_rc(struct sk_buff *skb, + struct ncsi_cmd_arg *nca) +{ + struct ncsi_cmd_rc_pkt *cmd; + + cmd = (struct ncsi_cmd_rc_pkt *)skb_put(skb, sizeof(*cmd)); + memset(cmd, 0, sizeof(*cmd)); + ncsi_cmd_build_header(&cmd->cmd.common, nca); + + return 0; +} + +static int ncsi_cmd_handler_ae(struct sk_buff *skb, + struct ncsi_cmd_arg *nca) +{ + struct ncsi_cmd_ae_pkt *cmd; + + cmd = (struct ncsi_cmd_ae_pkt *)skb_put(skb, sizeof(*cmd)); + memset(cmd, 0, sizeof(*cmd)); + cmd->mc_id = nca->bytes[0]; + cmd->mode = htonl(nca->dwords[1]); + ncsi_cmd_build_header(&cmd->cmd.common, nca); + + return 0; +} + +static int ncsi_cmd_handler_sl(struct sk_buff *skb, + struct ncsi_cmd_arg *nca) +{ + struct ncsi_cmd_sl_pkt *cmd; + + cmd = (struct ncsi_cmd_sl_pkt *)skb_put(skb, sizeof(*cmd)); + memset(cmd, 0, sizeof(*cmd)); + cmd->mode = htonl(nca->dwords[0]); + cmd->oem_mode = htonl(nca->dwords[1]); + ncsi_cmd_build_header(&cmd->cmd.common, nca); + + return 0; +} + +static int ncsi_cmd_handler_svf(struct sk_buff *skb, + struct ncsi_cmd_arg *nca) +{ + struct ncsi_cmd_svf_pkt *cmd; + + cmd = (struct ncsi_cmd_svf_pkt *)skb_put(skb, sizeof(*cmd)); + memset(cmd, 0, sizeof(*cmd)); + cmd->vlan = htons(nca->words[0]); + cmd->index = nca->bytes[2]; + cmd->enable = nca->bytes[3]; + ncsi_cmd_build_header(&cmd->cmd.common, nca); + + return 0; +} + +static int ncsi_cmd_handler_ev(struct sk_buff *skb, + struct ncsi_cmd_arg *nca) +{ + struct ncsi_cmd_ev_pkt *cmd; + + cmd = (struct ncsi_cmd_ev_pkt *)skb_put(skb, sizeof(*cmd)); + memset(cmd, 0, sizeof(*cmd)); + cmd->mode = nca->bytes[0]; + ncsi_cmd_build_header(&cmd->cmd.common, nca); + + return 0; +} + +static int ncsi_cmd_handler_sma(struct sk_buff *skb, + struct ncsi_cmd_arg *nca) +{ + struct ncsi_cmd_sma_pkt *cmd; + int i; + + cmd = (struct ncsi_cmd_sma_pkt *)skb_put(skb, sizeof(*cmd)); + memset(cmd, 0, sizeof(*cmd)); + for (i = 0; i < 6; i++) + cmd->mac[i] = nca->bytes[i]; + cmd->index = nca->bytes[6]; + cmd->at_e = nca->bytes[7]; + ncsi_cmd_build_header(&cmd->cmd.common, nca); + + return 0; +} + +static int ncsi_cmd_handler_ebf(struct sk_buff *skb, + struct ncsi_cmd_arg *nca) +{ + struct ncsi_cmd_ebf_pkt *cmd; + + cmd = (struct ncsi_cmd_ebf_pkt *)skb_put(skb, sizeof(*cmd)); + memset(cmd, 0, sizeof(*cmd)); + cmd->mode = htonl(nca->dwords[0]); + ncsi_cmd_build_header(&cmd->cmd.common, nca); + + return 0; +} + +static int ncsi_cmd_handler_egmf(struct sk_buff *skb, + struct ncsi_cmd_arg *nca) +{ + struct ncsi_cmd_egmf_pkt *cmd; + + cmd = (struct ncsi_cmd_egmf_pkt *)skb_put(skb, sizeof(*cmd)); + memset(cmd, 0, sizeof(*cmd)); + cmd->mode = htonl(nca->dwords[0]); + ncsi_cmd_build_header(&cmd->cmd.common, nca); + + return 0; +} + +static int ncsi_cmd_handler_snfc(struct sk_buff *skb, + struct ncsi_cmd_arg *nca) +{ + struct ncsi_cmd_snfc_pkt *cmd; + + cmd = (struct ncsi_cmd_snfc_pkt *)skb_put(skb, sizeof(*cmd)); + memset(cmd, 0, sizeof(*cmd)); + cmd->mode = nca->bytes[0]; + ncsi_cmd_build_header(&cmd->cmd.common, nca); + + return 0; +} + +static struct ncsi_cmd_handler { + unsigned char type; + int payload; + int (*handler)(struct sk_buff *skb, + struct ncsi_cmd_arg *nca); +} ncsi_cmd_handlers[] = { + { NCSI_PKT_CMD_CIS, 0, ncsi_cmd_handler_default }, + { NCSI_PKT_CMD_SP, 4, ncsi_cmd_handler_sp }, + { NCSI_PKT_CMD_DP, 0, ncsi_cmd_handler_default }, + { NCSI_PKT_CMD_EC, 0, ncsi_cmd_handler_default }, + { NCSI_PKT_CMD_DC, 4, ncsi_cmd_handler_dc }, + { NCSI_PKT_CMD_RC, 4, ncsi_cmd_handler_rc }, + { NCSI_PKT_CMD_ECNT, 0, ncsi_cmd_handler_default }, + { NCSI_PKT_CMD_DCNT, 0, ncsi_cmd_handler_default }, + { NCSI_PKT_CMD_AE, 8, ncsi_cmd_handler_ae }, + { NCSI_PKT_CMD_SL, 8, ncsi_cmd_handler_sl }, + { NCSI_PKT_CMD_GLS, 0, ncsi_cmd_handler_default }, + { NCSI_PKT_CMD_SVF, 4, ncsi_cmd_handler_svf }, + { NCSI_PKT_CMD_EV, 4, ncsi_cmd_handler_ev }, + { NCSI_PKT_CMD_DV, 0, ncsi_cmd_handler_default }, + { NCSI_PKT_CMD_SMA, 8, ncsi_cmd_handler_sma }, + { NCSI_PKT_CMD_EBF, 4, ncsi_cmd_handler_ebf }, + { NCSI_PKT_CMD_DBF, 0, ncsi_cmd_handler_default }, + { NCSI_PKT_CMD_EGMF, 4, ncsi_cmd_handler_egmf }, + { NCSI_PKT_CMD_DGMF, 0, ncsi_cmd_handler_default }, + { NCSI_PKT_CMD_SNFC, 4, ncsi_cmd_handler_snfc }, + { NCSI_PKT_CMD_GVI, 0, ncsi_cmd_handler_default }, + { NCSI_PKT_CMD_GC, 0, ncsi_cmd_handler_default }, + { NCSI_PKT_CMD_GP, 0, ncsi_cmd_handler_default }, + { NCSI_PKT_CMD_GCPS, 0, ncsi_cmd_handler_default }, + { NCSI_PKT_CMD_GNS, 0, ncsi_cmd_handler_default }, + { NCSI_PKT_CMD_GNPTS, 0, ncsi_cmd_handler_default }, + { NCSI_PKT_CMD_GPS, 0, ncsi_cmd_handler_default }, + { NCSI_PKT_CMD_OEM, 0, NULL }, + { NCSI_PKT_CMD_PLDM, 0, NULL }, + { NCSI_PKT_CMD_GPUUID, 0, ncsi_cmd_handler_default } +}; + +static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca) +{ + struct ncsi_dev_priv *ndp = nca->ndp; + struct ncsi_dev *nd = &ndp->ndev; + struct net_device *dev = nd->dev; + int hlen = LL_RESERVED_SPACE(dev); + int tlen = dev->needed_tailroom; + int len = hlen + tlen; + struct sk_buff *skb; + struct ncsi_request *nr; + + nr = ncsi_alloc_request(ndp, nca->driven); + if (!nr) + return NULL; + + /* NCSI command packet has 16-bytes header, payload, 4 bytes checksum. + * The packet needs padding if its payload is less than 26 bytes to + * meet 64 bytes minimal ethernet frame length. + */ + len += sizeof(struct ncsi_cmd_pkt_hdr) + 4; + if (nca->payload < 26) + len += 26; + else + len += nca->payload; + + /* Allocate skb */ + skb = alloc_skb(len, GFP_ATOMIC); + if (!skb) { + ncsi_free_request(nr); + return NULL; + } + + nr->cmd = skb; + skb_reserve(skb, hlen); + skb_reset_network_header(skb); + + skb->dev = dev; + skb->protocol = htons(ETH_P_NCSI); + + return nr; +} + +int ncsi_xmit_cmd(struct ncsi_cmd_arg *nca) +{ + struct ncsi_request *nr; + struct ethhdr *eh; + struct ncsi_cmd_handler *nch = NULL; + int i, ret; + + /* Search for the handler */ + for (i = 0; i < ARRAY_SIZE(ncsi_cmd_handlers); i++) { + if (ncsi_cmd_handlers[i].type == nca->type) { + if (ncsi_cmd_handlers[i].handler) + nch = &ncsi_cmd_handlers[i]; + else + nch = NULL; + + break; + } + } + + if (!nch) { + netdev_err(nca->ndp->ndev.dev, + "Cannot send packet with type 0x%02x\n", nca->type); + return -ENOENT; + } + + /* Get packet payload length and allocate the request */ + nca->payload = nch->payload; + nr = ncsi_alloc_command(nca); + if (!nr) + return -ENOMEM; + + /* Prepare the packet */ + nca->id = nr->id; + ret = nch->handler(nr->cmd, nca); + if (ret) { + ncsi_free_request(nr); + return ret; + } + + /* Fill the ethernet header */ + eh = (struct ethhdr *)skb_push(nr->cmd, sizeof(*eh)); + eh->h_proto = htons(ETH_P_NCSI); + eth_broadcast_addr(eh->h_dest); + eth_broadcast_addr(eh->h_source); + + /* Start the timer for the request that might not have + * corresponding response. Given NCSI is an internal + * connection a 1 second delay should be sufficient. + */ + nr->enabled = true; + mod_timer(&nr->timer, jiffies + 1 * HZ); + + /* Send NCSI packet */ + skb_get(nr->cmd); + ret = dev_queue_xmit(nr->cmd); + if (ret < 0) { + ncsi_free_request(nr); + return ret; + } + + return 0; +} diff --git a/net/ncsi/ncsi-manage.c b/net/ncsi/ncsi-manage.c new file mode 100644 index 0000000..d627a39 --- /dev/null +++ b/net/ncsi/ncsi-manage.c @@ -0,0 +1,1199 @@ +/* + * Copyright Gavin Shan, IBM Corporation 2016. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> +#include <linux/netlink.h> + +#include <net/ncsi.h> +#include <net/net_namespace.h> +#include <net/sock.h> +#include <net/addrconf.h> +#include <net/ipv6.h> +#include <net/if_inet6.h> + +#include "internal.h" +#include "ncsi-pkt.h" + +LIST_HEAD(ncsi_dev_list); +DEFINE_SPINLOCK(ncsi_dev_lock); + +static inline int ncsi_filter_size(int table) +{ + int sizes[] = { 2, 6, 6, 6 }; + + BUILD_BUG_ON(ARRAY_SIZE(sizes) != NCSI_FILTER_MAX); + if (table < NCSI_FILTER_BASE || table >= NCSI_FILTER_MAX) + return -EINVAL; + + return sizes[table]; +} + +int ncsi_find_filter(struct ncsi_channel *nc, int table, void *data) +{ + struct ncsi_channel_filter *ncf; + void *bitmap; + int index, size; + unsigned long flags; + + ncf = nc->filters[table]; + if (!ncf) + return -ENXIO; + + size = ncsi_filter_size(table); + if (size < 0) + return size; + + spin_lock_irqsave(&nc->lock, flags); + bitmap = (void *)&ncf->bitmap; + index = -1; + while ((index = find_next_bit(bitmap, ncf->total, index + 1)) + < ncf->total) { + if (!memcmp(ncf->data + size * index, data, size)) { + spin_unlock_irqrestore(&nc->lock, flags); + return index; + } + } + spin_unlock_irqrestore(&nc->lock, flags); + + return -ENOENT; +} + +int ncsi_add_filter(struct ncsi_channel *nc, int table, void *data) +{ + struct ncsi_channel_filter *ncf; + int index, size; + void *bitmap; + unsigned long flags; + + size = ncsi_filter_size(table); + if (size < 0) + return size; + + index = ncsi_find_filter(nc, table, data); + if (index >= 0) + return index; + + ncf = nc->filters[table]; + if (!ncf) + return -ENODEV; + + spin_lock_irqsave(&nc->lock, flags); + bitmap = (void *)&ncf->bitmap; + do { + index = find_next_zero_bit(bitmap, ncf->total, 0); + if (index >= ncf->total) { + spin_unlock_irqrestore(&nc->lock, flags); + return -ENOSPC; + } + } while (test_and_set_bit(index, bitmap)); + + memcpy(ncf->data + size * index, data, size); + spin_unlock_irqrestore(&nc->lock, flags); + + return index; +} + +int ncsi_remove_filter(struct ncsi_channel *nc, int table, int index) +{ + struct ncsi_channel_filter *ncf; + int size; + void *bitmap; + unsigned long flags; + + size = ncsi_filter_size(table); + if (size < 0) + return size; + + ncf = nc->filters[table]; + if (!ncf || index >= ncf->total) + return -ENODEV; + + spin_lock_irqsave(&nc->lock, flags); + bitmap = (void *)&ncf->bitmap; + if (test_and_clear_bit(index, bitmap)) + memset(ncf->data + size * index, 0, size); + spin_unlock_irqrestore(&nc->lock, flags); + + return 0; +} + +static void ncsi_report_link(struct ncsi_dev_priv *ndp, bool force_down) +{ + struct ncsi_dev *nd = &ndp->ndev; + struct ncsi_package *np; + struct ncsi_channel *nc; + + nd->state = ncsi_dev_state_functional; + if (force_down) { + nd->link_up = 0; + goto report; + } + + nd->link_up = 0; + NCSI_FOR_EACH_PACKAGE(ndp, np) { + NCSI_FOR_EACH_CHANNEL(np, nc) { + if (!list_empty(&nc->link) || + nc->state != NCSI_CHANNEL_ACTIVE) + continue; + + if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) { + nd->link_up = 1; + goto report; + } + } + } + +report: + nd->handler(nd); +} + +static void ncsi_channel_monitor(unsigned long data) +{ + struct ncsi_channel *nc = (struct ncsi_channel *)data; + struct ncsi_package *np = nc->package; + struct ncsi_dev_priv *ndp = np->ndp; + struct ncsi_cmd_arg nca; + bool enabled; + unsigned int timeout; + unsigned long flags; + int ret; + + spin_lock_irqsave(&nc->lock, flags); + timeout = nc->timeout; + enabled = nc->enabled; + spin_unlock_irqrestore(&nc->lock, flags); + + if (!enabled || !list_empty(&nc->link)) + return; + if (nc->state != NCSI_CHANNEL_INACTIVE && + nc->state != NCSI_CHANNEL_ACTIVE) + return; + + if (!(timeout % 2)) { + nca.ndp = ndp; + nca.package = np->id; + nca.channel = nc->id; + nca.type = NCSI_PKT_CMD_GLS; + nca.driven = false; + ret = ncsi_xmit_cmd(&nca); + if (ret) { + netdev_err(ndp->ndev.dev, "Error %d sending GLS\n", + ret); + return; + } + } + + if (timeout + 1 >= 3) { + if (!(ndp->flags & NCSI_DEV_HWA) && + nc->state == NCSI_CHANNEL_ACTIVE) + ncsi_report_link(ndp, true); + + spin_lock_irqsave(&ndp->lock, flags); + xchg(&nc->state, NCSI_CHANNEL_INACTIVE); + list_add_tail_rcu(&nc->link, &ndp->channel_queue); + spin_unlock_irqrestore(&ndp->lock, flags); + ncsi_process_next_channel(ndp); + return; + } + + spin_lock_irqsave(&nc->lock, flags); + nc->timeout = timeout + 1; + nc->enabled = true; + spin_unlock_irqrestore(&nc->lock, flags); + mod_timer(&nc->timer, jiffies + HZ * (1 << (nc->timeout / 2))); +} + +void ncsi_start_channel_monitor(struct ncsi_channel *nc) +{ + unsigned long flags; + + spin_lock_irqsave(&nc->lock, flags); + WARN_ON_ONCE(nc->enabled); + nc->timeout = 0; + nc->enabled = true; + spin_unlock_irqrestore(&nc->lock, flags); + + mod_timer(&nc->timer, jiffies + HZ * (1 << (nc->timeout / 2))); +} + +void ncsi_stop_channel_monitor(struct ncsi_channel *nc) +{ + unsigned long flags; + + spin_lock_irqsave(&nc->lock, flags); + if (!nc->enabled) { + spin_unlock_irqrestore(&nc->lock, flags); + return; + } + nc->enabled = false; + spin_unlock_irqrestore(&nc->lock, flags); + + del_timer_sync(&nc->timer); +} + +struct ncsi_channel *ncsi_find_channel(struct ncsi_package *np, + unsigned char id) +{ + struct ncsi_channel *nc; + + NCSI_FOR_EACH_CHANNEL(np, nc) { + if (nc->id == id) + return nc; + } + + return NULL; +} + +struct ncsi_channel *ncsi_add_channel(struct ncsi_package *np, unsigned char id) +{ + struct ncsi_channel *nc, *tmp; + int index; + unsigned long flags; + + nc = kzalloc(sizeof(*nc), GFP_ATOMIC); + if (!nc) + return NULL; + + nc->id = id; + nc->package = np; + nc->state = NCSI_CHANNEL_INACTIVE; + nc->enabled = false; + setup_timer(&nc->timer, ncsi_channel_monitor, (unsigned long)nc); + spin_lock_init(&nc->lock); + INIT_LIST_HEAD(&nc->link); + for (index = 0; index < NCSI_CAP_MAX; index++) + nc->caps[index].index = index; + for (index = 0; index < NCSI_MODE_MAX; index++) + nc->modes[index].index = index; + + spin_lock_irqsave(&np->lock, flags); + tmp = ncsi_find_channel(np, id); + if (tmp) { + spin_unlock_irqrestore(&np->lock, flags); + kfree(nc); + return tmp; + } + + list_add_tail_rcu(&nc->node, &np->channels); + np->channel_num++; + spin_unlock_irqrestore(&np->lock, flags); + + return nc; +} + +static void ncsi_remove_channel(struct ncsi_channel *nc) +{ + struct ncsi_package *np = nc->package; + struct ncsi_channel_filter *ncf; + unsigned long flags; + int i; + + /* Release filters */ + spin_lock_irqsave(&nc->lock, flags); + for (i = 0; i < NCSI_FILTER_MAX; i++) { + ncf = nc->filters[i]; + if (!ncf) + continue; + + nc->filters[i] = NULL; + kfree(ncf); + } + + nc->state = NCSI_CHANNEL_INACTIVE; + spin_unlock_irqrestore(&nc->lock, flags); + ncsi_stop_channel_monitor(nc); + + /* Remove and free channel */ + spin_lock_irqsave(&np->lock, flags); + list_del_rcu(&nc->node); + np->channel_num--; + spin_unlock_irqrestore(&np->lock, flags); + + kfree(nc); +} + +struct ncsi_package *ncsi_find_package(struct ncsi_dev_priv *ndp, + unsigned char id) +{ + struct ncsi_package *np; + + NCSI_FOR_EACH_PACKAGE(ndp, np) { + if (np->id == id) + return np; + } + + return NULL; +} + +struct ncsi_package *ncsi_add_package(struct ncsi_dev_priv *ndp, + unsigned char id) +{ + struct ncsi_package *np, *tmp; + unsigned long flags; + + np = kzalloc(sizeof(*np), GFP_ATOMIC); + if (!np) + return NULL; + + np->id = id; + np->ndp = ndp; + spin_lock_init(&np->lock); + INIT_LIST_HEAD(&np->channels); + + spin_lock_irqsave(&ndp->lock, flags); + tmp = ncsi_find_package(ndp, id); + if (tmp) { + spin_unlock_irqrestore(&ndp->lock, flags); + kfree(np); + return tmp; + } + + list_add_tail_rcu(&np->node, &ndp->packages); + ndp->package_num++; + spin_unlock_irqrestore(&ndp->lock, flags); + + return np; +} + +void ncsi_remove_package(struct ncsi_package *np) +{ + struct ncsi_dev_priv *ndp = np->ndp; + struct ncsi_channel *nc, *tmp; + unsigned long flags; + + /* Release all child channels */ + list_for_each_entry_safe(nc, tmp, &np->channels, node) + ncsi_remove_channel(nc); + + /* Remove and free package */ + spin_lock_irqsave(&ndp->lock, flags); + list_del_rcu(&np->node); + ndp->package_num--; + spin_unlock_irqrestore(&ndp->lock, flags); + + kfree(np); +} + +void ncsi_find_package_and_channel(struct ncsi_dev_priv *ndp, + unsigned char id, + struct ncsi_package **np, + struct ncsi_channel **nc) +{ + struct ncsi_package *p; + struct ncsi_channel *c; + + p = ncsi_find_package(ndp, NCSI_PACKAGE_INDEX(id)); + c = p ? ncsi_find_channel(p, NCSI_CHANNEL_INDEX(id)) : NULL; + + if (np) + *np = p; + if (nc) + *nc = c; +} + +/* For two consecutive NCSI commands, the packet IDs shouldn't + * be same. Otherwise, the bogus response might be replied. So + * the available IDs are allocated in round-robin fashion. + */ +struct ncsi_request *ncsi_alloc_request(struct ncsi_dev_priv *ndp, bool driven) +{ + struct ncsi_request *nr = NULL; + int i, limit = ARRAY_SIZE(ndp->requests); + unsigned long flags; + + /* Check if there is one available request until the ceiling */ + spin_lock_irqsave(&ndp->lock, flags); + for (i = ndp->request_id; !nr && i < limit; i++) { + if (ndp->requests[i].used) + continue; + + nr = &ndp->requests[i]; + nr->used = true; + nr->driven = driven; + if (++ndp->request_id >= limit) + ndp->request_id = 0; + } + + /* Fail back to check from the starting cursor */ + for (i = 0; !nr && i < ndp->request_id; i++) { + if (ndp->requests[i].used) + continue; + + nr = &ndp->requests[i]; + nr->used = true; + nr->driven = driven; + if (++ndp->request_id >= limit) + ndp->request_id = 0; + } + spin_unlock_irqrestore(&ndp->lock, flags); + + return nr; +} + +void ncsi_free_request(struct ncsi_request *nr) +{ + struct ncsi_dev_priv *ndp = nr->ndp; + struct sk_buff *cmd, *rsp; + unsigned long flags; + bool driven; + + if (nr->enabled) { + nr->enabled = false; + del_timer_sync(&nr->timer); + } + + spin_lock_irqsave(&ndp->lock, flags); + cmd = nr->cmd; + rsp = nr->rsp; + nr->cmd = NULL; + nr->rsp = NULL; + nr->used = false; + driven = nr->driven; + spin_unlock_irqrestore(&ndp->lock, flags); + + if (driven && cmd && --ndp->pending_req_num == 0) + schedule_work(&ndp->work); + + /* Release command and response */ + consume_skb(cmd); + consume_skb(rsp); +} + +struct ncsi_dev *ncsi_find_dev(struct net_device *dev) +{ + struct ncsi_dev_priv *ndp; + + NCSI_FOR_EACH_DEV(ndp) { + if (ndp->ndev.dev == dev) + return &ndp->ndev; + } + + return NULL; +} + +static void ncsi_request_timeout(unsigned long data) +{ + struct ncsi_request *nr = (struct ncsi_request *)data; + struct ncsi_dev_priv *ndp = nr->ndp; + unsigned long flags; + + /* If the request already had associated response, + * let the response handler to release it. + */ + spin_lock_irqsave(&ndp->lock, flags); + nr->enabled = false; + if (nr->rsp || !nr->cmd) { + spin_unlock_irqrestore(&ndp->lock, flags); + return; + } + spin_unlock_irqrestore(&ndp->lock, flags); + + /* Release the request */ + ncsi_free_request(nr); +} + +static void ncsi_suspend_channel(struct ncsi_dev_priv *ndp) +{ + struct ncsi_dev *nd = &ndp->ndev; + struct ncsi_package *np = ndp->active_package; + struct ncsi_channel *nc = ndp->active_channel; + struct ncsi_cmd_arg nca; + int ret; + + nca.ndp = ndp; + nca.driven = true; + switch (nd->state) { + case ncsi_dev_state_suspend: + nd->state = ncsi_dev_state_suspend_select; + /* Fall through */ + case ncsi_dev_state_suspend_select: + case ncsi_dev_state_suspend_dcnt: + case ncsi_dev_state_suspend_dc: + case ncsi_dev_state_suspend_deselect: + ndp->pending_req_num = 1; + + np = ndp->active_package; + nc = ndp->active_channel; + nca.package = np->id; + if (nd->state == ncsi_dev_state_suspend_select) { + nca.type = NCSI_PKT_CMD_SP; + nca.channel = 0x1f; + if (ndp->flags & NCSI_DEV_HWA) + nca.bytes[0] = 0; + else + nca.bytes[0] = 1; + nd->state = ncsi_dev_state_suspend_dcnt; + } else if (nd->state == ncsi_dev_state_suspend_dcnt) { + nca.type = NCSI_PKT_CMD_DCNT; + nca.channel = nc->id; + nd->state = ncsi_dev_state_suspend_dc; + } else if (nd->state == ncsi_dev_state_suspend_dc) { + nca.type = NCSI_PKT_CMD_DC; + nca.channel = nc->id; + nca.bytes[0] = 1; + nd->state = ncsi_dev_state_suspend_deselect; + } else if (nd->state == ncsi_dev_state_suspend_deselect) { + nca.type = NCSI_PKT_CMD_DP; + nca.channel = 0x1f; + nd->state = ncsi_dev_state_suspend_done; + } + + ret = ncsi_xmit_cmd(&nca); + if (ret) { + nd->state = ncsi_dev_state_functional; + return; + } + + break; + case ncsi_dev_state_suspend_done: + xchg(&nc->state, NCSI_CHANNEL_INACTIVE); + ncsi_process_next_channel(ndp); + + break; + default: + netdev_warn(nd->dev, "Wrong NCSI state 0x%x in suspend\n", + nd->state); + } +} + +static void ncsi_configure_channel(struct ncsi_dev_priv *ndp) +{ + struct ncsi_dev *nd = &ndp->ndev; + struct net_device *dev = nd->dev; + struct ncsi_package *np = ndp->active_package; + struct ncsi_channel *nc = ndp->active_channel; + struct ncsi_cmd_arg nca; + unsigned char index; + int ret; + + nca.ndp = ndp; + nca.driven = true; + switch (nd->state) { + case ncsi_dev_state_config: + case ncsi_dev_state_config_sp: + ndp->pending_req_num = 1; + + /* Select the specific package */ + nca.type = NCSI_PKT_CMD_SP; + if (ndp->flags & NCSI_DEV_HWA) + nca.bytes[0] = 0; + else + nca.bytes[0] = 1; + nca.package = np->id; + nca.channel = 0x1f; + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; + + nd->state = ncsi_dev_state_config_cis; + break; + case ncsi_dev_state_config_cis: + ndp->pending_req_num = 1; + + /* Clear initial state */ + nca.type = NCSI_PKT_CMD_CIS; + nca.package = np->id; + nca.channel = nc->id; + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; + + nd->state = ncsi_dev_state_config_sma; + break; + case ncsi_dev_state_config_sma: + case ncsi_dev_state_config_ebf: +#if IS_ENABLED(CONFIG_IPV6) + case ncsi_dev_state_config_egmf: +#endif + case ncsi_dev_state_config_ecnt: + case ncsi_dev_state_config_ec: + case ncsi_dev_state_config_ae: + case ncsi_dev_state_config_gls: + ndp->pending_req_num = 1; + + nca.package = np->id; + nca.channel = nc->id; + + /* Use first entry in unicast filter table. Note that + * the MAC filter table starts from entry 1 instead of + * 0. + */ + if (nd->state == ncsi_dev_state_config_sma) { + nca.type = NCSI_PKT_CMD_SMA; + for (index = 0; index < 6; index++) + nca.bytes[index] = dev->dev_addr[index]; + nca.bytes[6] = 0x1; + nca.bytes[7] = 0x1; + nd->state = ncsi_dev_state_config_ebf; + } else if (nd->state == ncsi_dev_state_config_ebf) { + nca.type = NCSI_PKT_CMD_EBF; + nca.dwords[0] = nc->caps[NCSI_CAP_BC].cap; + nd->state = ncsi_dev_state_config_ecnt; +#if IS_ENABLED(CONFIG_IPV6) + if (ndp->inet6_addr_num > 0 && + (nc->caps[NCSI_CAP_GENERIC].cap & + NCSI_CAP_GENERIC_MC)) + nd->state = ncsi_dev_state_config_egmf; + else + nd->state = ncsi_dev_state_config_ecnt; + } else if (nd->state == ncsi_dev_state_config_egmf) { + nca.type = NCSI_PKT_CMD_EGMF; + nca.dwords[0] = nc->caps[NCSI_CAP_MC].cap; + nd->state = ncsi_dev_state_config_ecnt; +#endif /* CONFIG_IPV6 */ + } else if (nd->state == ncsi_dev_state_config_ecnt) { + nca.type = NCSI_PKT_CMD_ECNT; + nd->state = ncsi_dev_state_config_ec; + } else if (nd->state == ncsi_dev_state_config_ec) { + /* Enable AEN if it's supported */ + nca.type = NCSI_PKT_CMD_EC; + nd->state = ncsi_dev_state_config_ae; + if (!(nc->caps[NCSI_CAP_AEN].cap & NCSI_CAP_AEN_MASK)) + nd->state = ncsi_dev_state_config_gls; + } else if (nd->state == ncsi_dev_state_config_ae) { + nca.type = NCSI_PKT_CMD_AE; + nca.bytes[0] = 0; + nca.dwords[1] = nc->caps[NCSI_CAP_AEN].cap; + nd->state = ncsi_dev_state_config_gls; + } else if (nd->state == ncsi_dev_state_config_gls) { + nca.type = NCSI_PKT_CMD_GLS; + nd->state = ncsi_dev_state_config_done; + } + + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; + break; + case ncsi_dev_state_config_done: + if (nc->modes[NCSI_MODE_LINK].data[2] & 0x1) + xchg(&nc->state, NCSI_CHANNEL_ACTIVE); + else + xchg(&nc->state, NCSI_CHANNEL_INACTIVE); + + ncsi_start_channel_monitor(nc); + ncsi_process_next_channel(ndp); + break; + default: + netdev_warn(dev, "Wrong NCSI state 0x%x in config\n", + nd->state); + } + + return; + +error: + ncsi_report_link(ndp, true); +} + +static int ncsi_choose_active_channel(struct ncsi_dev_priv *ndp) +{ + struct ncsi_package *np; + struct ncsi_channel *nc, *found; + struct ncsi_channel_mode *ncm; + unsigned long flags; + + /* The search is done once an inactive channel with up + * link is found. + */ + found = NULL; + NCSI_FOR_EACH_PACKAGE(ndp, np) { + NCSI_FOR_EACH_CHANNEL(np, nc) { + if (!list_empty(&nc->link) || + nc->state != NCSI_CHANNEL_INACTIVE) + continue; + + if (!found) + found = nc; + + ncm = &nc->modes[NCSI_MODE_LINK]; + if (ncm->data[2] & 0x1) { + found = nc; + goto out; + } + } + } + + if (!found) { + ncsi_report_link(ndp, true); + return -ENODEV; + } + +out: + spin_lock_irqsave(&ndp->lock, flags); + list_add_tail_rcu(&found->link, &ndp->channel_queue); + spin_unlock_irqrestore(&ndp->lock, flags); + + return ncsi_process_next_channel(ndp); +} + +static bool ncsi_check_hwa(struct ncsi_dev_priv *ndp) +{ + struct ncsi_package *np; + struct ncsi_channel *nc; + unsigned int cap; + + /* The hardware arbitration is disabled if any one channel + * doesn't support explicitly. + */ + NCSI_FOR_EACH_PACKAGE(ndp, np) { + NCSI_FOR_EACH_CHANNEL(np, nc) { + cap = nc->caps[NCSI_CAP_GENERIC].cap; + if (!(cap & NCSI_CAP_GENERIC_HWA) || + (cap & NCSI_CAP_GENERIC_HWA_MASK) != + NCSI_CAP_GENERIC_HWA_SUPPORT) { + ndp->flags &= ~NCSI_DEV_HWA; + return false; + } + } + } + + ndp->flags |= NCSI_DEV_HWA; + return true; +} + +static int ncsi_enable_hwa(struct ncsi_dev_priv *ndp) +{ + struct ncsi_package *np; + struct ncsi_channel *nc; + unsigned long flags; + + /* Move all available channels to processing queue */ + spin_lock_irqsave(&ndp->lock, flags); + NCSI_FOR_EACH_PACKAGE(ndp, np) { + NCSI_FOR_EACH_CHANNEL(np, nc) { + WARN_ON_ONCE(nc->state != NCSI_CHANNEL_INACTIVE || + !list_empty(&nc->link)); + ncsi_stop_channel_monitor(nc); + list_add_tail_rcu(&nc->link, &ndp->channel_queue); + } + } + spin_unlock_irqrestore(&ndp->lock, flags); + + /* We can have no channels in extremely case */ + if (list_empty(&ndp->channel_queue)) { + ncsi_report_link(ndp, false); + return -ENOENT; + } + + return ncsi_process_next_channel(ndp); +} + +static void ncsi_probe_channel(struct ncsi_dev_priv *ndp) +{ + struct ncsi_dev *nd = &ndp->ndev; + struct ncsi_package *np; + struct ncsi_channel *nc; + struct ncsi_cmd_arg nca; + unsigned char index; + int ret; + + nca.ndp = ndp; + nca.driven = true; + switch (nd->state) { + case ncsi_dev_state_probe: + nd->state = ncsi_dev_state_probe_deselect; + /* Fall through */ + case ncsi_dev_state_probe_deselect: + ndp->pending_req_num = 8; + + /* Deselect all possible packages */ + nca.type = NCSI_PKT_CMD_DP; + nca.channel = 0x1f; + for (index = 0; index < 8; index++) { + nca.package = index; + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; + } + + nd->state = ncsi_dev_state_probe_package; + break; + case ncsi_dev_state_probe_package: + ndp->pending_req_num = 16; + + /* Select all possible packages */ + nca.type = NCSI_PKT_CMD_SP; + nca.bytes[0] = 1; + nca.channel = 0x1f; + for (index = 0; index < 8; index++) { + nca.package = index; + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; + } + + /* Disable all possible packages */ + nca.type = NCSI_PKT_CMD_DP; + for (index = 0; index < 8; index++) { + nca.package = index; + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; + } + + nd->state = ncsi_dev_state_probe_channel; + break; + case ncsi_dev_state_probe_channel: + if (!ndp->active_package) + ndp->active_package = list_first_or_null_rcu( + &ndp->packages, struct ncsi_package, node); + else if (list_is_last(&ndp->active_package->node, + &ndp->packages)) + ndp->active_package = NULL; + else + ndp->active_package = list_next_entry( + ndp->active_package, node); + + /* All available packages and channels are enumerated. The + * enumeration happens for once when the NCSI interface is + * started. So we need continue to start the interface after + * the enumeration. + * + * We have to choose an active channel before configuring it. + * Note that we possibly don't have active channel in extreme + * situation. + */ + if (!ndp->active_package) { + ndp->flags |= NCSI_DEV_PROBED; + if (ncsi_check_hwa(ndp)) + ncsi_enable_hwa(ndp); + else + ncsi_choose_active_channel(ndp); + return; + } + + /* Select the active package */ + ndp->pending_req_num = 1; + nca.type = NCSI_PKT_CMD_SP; + nca.bytes[0] = 1; + nca.package = ndp->active_package->id; + nca.channel = 0x1f; + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; + + nd->state = ncsi_dev_state_probe_cis; + break; + case ncsi_dev_state_probe_cis: + ndp->pending_req_num = 32; + + /* Clear initial state */ + nca.type = NCSI_PKT_CMD_CIS; + nca.package = ndp->active_package->id; + for (index = 0; index < 0x20; index++) { + nca.channel = index; + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; + } + + nd->state = ncsi_dev_state_probe_gvi; + break; + case ncsi_dev_state_probe_gvi: + case ncsi_dev_state_probe_gc: + case ncsi_dev_state_probe_gls: + np = ndp->active_package; + ndp->pending_req_num = np->channel_num; + + /* Retrieve version, capability or link status */ + if (nd->state == ncsi_dev_state_probe_gvi) + nca.type = NCSI_PKT_CMD_GVI; + else if (nd->state == ncsi_dev_state_probe_gc) + nca.type = NCSI_PKT_CMD_GC; + else + nca.type = NCSI_PKT_CMD_GLS; + + nca.package = np->id; + NCSI_FOR_EACH_CHANNEL(np, nc) { + nca.channel = nc->id; + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; + } + + if (nd->state == ncsi_dev_state_probe_gvi) + nd->state = ncsi_dev_state_probe_gc; + else if (nd->state == ncsi_dev_state_probe_gc) + nd->state = ncsi_dev_state_probe_gls; + else + nd->state = ncsi_dev_state_probe_dp; + break; + case ncsi_dev_state_probe_dp: + ndp->pending_req_num = 1; + + /* Deselect the active package */ + nca.type = NCSI_PKT_CMD_DP; + nca.package = ndp->active_package->id; + nca.channel = 0x1f; + ret = ncsi_xmit_cmd(&nca); + if (ret) + goto error; + + /* Scan channels in next package */ + nd->state = ncsi_dev_state_probe_channel; + break; + default: + netdev_warn(nd->dev, "Wrong NCSI state 0x%0x in enumeration\n", + nd->state); + } + + return; +error: + ncsi_report_link(ndp, true); +} + +static void ncsi_dev_work(struct work_struct *work) +{ + struct ncsi_dev_priv *ndp = container_of(work, + struct ncsi_dev_priv, work); + struct ncsi_dev *nd = &ndp->ndev; + + switch (nd->state & ncsi_dev_state_major) { + case ncsi_dev_state_probe: + ncsi_probe_channel(ndp); + break; + case ncsi_dev_state_suspend: + ncsi_suspend_channel(ndp); + break; + case ncsi_dev_state_config: + ncsi_configure_channel(ndp); + break; + default: + netdev_warn(nd->dev, "Wrong NCSI state 0x%x in workqueue\n", + nd->state); + } +} + +int ncsi_process_next_channel(struct ncsi_dev_priv *ndp) +{ + struct ncsi_channel *nc; + int old_state; + unsigned long flags; + + spin_lock_irqsave(&ndp->lock, flags); + nc = list_first_or_null_rcu(&ndp->channel_queue, + struct ncsi_channel, link); + if (nc) { + old_state = xchg(&nc->state, NCSI_CHANNEL_INVISIBLE); + list_del_init(&nc->link); + } + spin_unlock_irqrestore(&ndp->lock, flags); + + ndp->active_channel = nc; + ndp->active_package = nc ? nc->package : NULL; + if (!nc) { + if (ndp->flags & NCSI_DEV_RESHUFFLE) { + ndp->flags &= ~NCSI_DEV_RESHUFFLE; + return ncsi_choose_active_channel(ndp); + } + + ncsi_report_link(ndp, false); + return -ENODEV; + } + + switch (old_state) { + case NCSI_CHANNEL_INACTIVE: + ndp->ndev.state = ncsi_dev_state_config; + ncsi_configure_channel(ndp); + break; + case NCSI_CHANNEL_ACTIVE: + ndp->ndev.state = ncsi_dev_state_suspend; + ncsi_suspend_channel(ndp); + break; + default: + netdev_err(ndp->ndev.dev, "Invalid state 0x%x on %d:%d\n", + nc->state, nc->package->id, nc->id); + ncsi_report_link(ndp, false); + return -EINVAL; + } + + return 0; +} + +#if IS_ENABLED(CONFIG_IPV6) +static int ncsi_inet6addr_event(struct notifier_block *this, + unsigned long event, void *data) +{ + struct inet6_ifaddr *ifa = data; + struct net_device *dev = ifa->idev->dev; + struct ncsi_dev *nd = ncsi_find_dev(dev); + struct ncsi_dev_priv *ndp = nd ? TO_NCSI_DEV_PRIV(nd) : NULL; + struct ncsi_package *np; + struct ncsi_channel *nc; + struct ncsi_cmd_arg nca; + bool action; + int ret; + + if (!ndp || (ipv6_addr_type(&ifa->addr) & + (IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK))) + return NOTIFY_OK; + + switch (event) { + case NETDEV_UP: + action = (++ndp->inet6_addr_num) == 1; + nca.type = NCSI_PKT_CMD_EGMF; + break; + case NETDEV_DOWN: + action = (--ndp->inet6_addr_num == 0); + nca.type = NCSI_PKT_CMD_DGMF; + break; + default: + return NOTIFY_OK; + } + + /* We might not have active channel or packages. The IPv6 + * required multicast will be enabled when active channel + * or packages are chosen. + */ + np = ndp->active_package; + nc = ndp->active_channel; + if (!action || !np || !nc) + return NOTIFY_OK; + + /* We needn't enable or disable it if the function isn't supported */ + if (!(nc->caps[NCSI_CAP_GENERIC].cap & NCSI_CAP_GENERIC_MC)) + return NOTIFY_OK; + + nca.ndp = ndp; + nca.driven = false; + nca.package = np->id; + nca.channel = nc->id; + nca.dwords[0] = nc->caps[NCSI_CAP_MC].cap; + ret = ncsi_xmit_cmd(&nca); + if (ret) { + netdev_warn(dev, "Fail to %s global multicast filter (%d)\n", + (event == NETDEV_UP) ? "enable" : "disable", ret); + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +static struct notifier_block ncsi_inet6addr_notifier = { + .notifier_call = ncsi_inet6addr_event, +}; +#endif /* CONFIG_IPV6 */ + +struct ncsi_dev *ncsi_register_dev(struct net_device *dev, + void (*handler)(struct ncsi_dev *ndev)) +{ + struct ncsi_dev_priv *ndp; + struct ncsi_dev *nd; + unsigned long flags; + int i; + + /* Check if the device has been registered or not */ + nd = ncsi_find_dev(dev); + if (nd) + return nd; + + /* Create NCSI device */ + ndp = kzalloc(sizeof(*ndp), GFP_ATOMIC); + if (!ndp) + return NULL; + + nd = &ndp->ndev; + nd->state = ncsi_dev_state_registered; + nd->dev = dev; + nd->handler = handler; + ndp->pending_req_num = 0; + INIT_LIST_HEAD(&ndp->channel_queue); + INIT_WORK(&ndp->work, ncsi_dev_work); + + /* Initialize private NCSI device */ + spin_lock_init(&ndp->lock); + INIT_LIST_HEAD(&ndp->packages); + ndp->request_id = 0; + for (i = 0; i < ARRAY_SIZE(ndp->requests); i++) { + ndp->requests[i].id = i; + ndp->requests[i].ndp = ndp; + setup_timer(&ndp->requests[i].timer, + ncsi_request_timeout, + (unsigned long)&ndp->requests[i]); + } + + spin_lock_irqsave(&ncsi_dev_lock, flags); +#if IS_ENABLED(CONFIG_IPV6) + ndp->inet6_addr_num = 0; + if (list_empty(&ncsi_dev_list)) + register_inet6addr_notifier(&ncsi_inet6addr_notifier); +#endif + list_add_tail_rcu(&ndp->node, &ncsi_dev_list); + spin_unlock_irqrestore(&ncsi_dev_lock, flags); + + /* Register NCSI packet Rx handler */ + ndp->ptype.type = cpu_to_be16(ETH_P_NCSI); + ndp->ptype.func = ncsi_rcv_rsp; + ndp->ptype.dev = dev; + dev_add_pack(&ndp->ptype); + + return nd; +} +EXPORT_SYMBOL_GPL(ncsi_register_dev); + +int ncsi_start_dev(struct ncsi_dev *nd) +{ + struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); + struct ncsi_package *np; + struct ncsi_channel *nc; + int old_state, ret; + + if (nd->state != ncsi_dev_state_registered && + nd->state != ncsi_dev_state_functional) + return -ENOTTY; + + if (!(ndp->flags & NCSI_DEV_PROBED)) { + nd->state = ncsi_dev_state_probe; + schedule_work(&ndp->work); + return 0; + } + + /* Reset channel's state and start over */ + NCSI_FOR_EACH_PACKAGE(ndp, np) { + NCSI_FOR_EACH_CHANNEL(np, nc) { + old_state = xchg(&nc->state, NCSI_CHANNEL_INACTIVE); + WARN_ON_ONCE(!list_empty(&nc->link) || + old_state == NCSI_CHANNEL_INVISIBLE); + } + } + + if (ndp->flags & NCSI_DEV_HWA) + ret = ncsi_enable_hwa(ndp); + else + ret = ncsi_choose_active_channel(ndp); + + return ret; +} +EXPORT_SYMBOL_GPL(ncsi_start_dev); + +void ncsi_unregister_dev(struct ncsi_dev *nd) +{ + struct ncsi_dev_priv *ndp = TO_NCSI_DEV_PRIV(nd); + struct ncsi_package *np, *tmp; + unsigned long flags; + + dev_remove_pack(&ndp->ptype); + + list_for_each_entry_safe(np, tmp, &ndp->packages, node) + ncsi_remove_package(np); + + spin_lock_irqsave(&ncsi_dev_lock, flags); + list_del_rcu(&ndp->node); +#if IS_ENABLED(CONFIG_IPV6) + if (list_empty(&ncsi_dev_list)) + unregister_inet6addr_notifier(&ncsi_inet6addr_notifier); +#endif + spin_unlock_irqrestore(&ncsi_dev_lock, flags); + + kfree(ndp); +} +EXPORT_SYMBOL_GPL(ncsi_unregister_dev); diff --git a/net/ncsi/ncsi-pkt.h b/net/ncsi/ncsi-pkt.h new file mode 100644 index 0000000..3ea49ed --- /dev/null +++ b/net/ncsi/ncsi-pkt.h @@ -0,0 +1,415 @@ +/* + * Copyright Gavin Shan, IBM Corporation 2016. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __NCSI_PKT_H__ +#define __NCSI_PKT_H__ + +struct ncsi_pkt_hdr { + unsigned char mc_id; /* Management controller ID */ + unsigned char revision; /* NCSI version - 0x01 */ + unsigned char reserved; /* Reserved */ + unsigned char id; /* Packet sequence number */ + unsigned char type; /* Packet type */ + unsigned char channel; /* Network controller ID */ + __be16 length; /* Payload length */ + __be32 reserved1[2]; /* Reserved */ +}; + +struct ncsi_cmd_pkt_hdr { + struct ncsi_pkt_hdr common; /* Common NCSI packet header */ +}; + +struct ncsi_rsp_pkt_hdr { + struct ncsi_pkt_hdr common; /* Common NCSI packet header */ + __be16 code; /* Response code */ + __be16 reason; /* Response reason */ +}; + +struct ncsi_aen_pkt_hdr { + struct ncsi_pkt_hdr common; /* Common NCSI packet header */ + unsigned char reserved2[3]; /* Reserved */ + unsigned char type; /* AEN packet type */ +}; + +/* NCSI common command packet */ +struct ncsi_cmd_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + __be32 checksum; /* Checksum */ + unsigned char pad[26]; +}; + +struct ncsi_rsp_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* Select Package */ +struct ncsi_cmd_sp_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + unsigned char reserved[3]; /* Reserved */ + unsigned char hw_arbitration; /* HW arbitration */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* Disable Channel */ +struct ncsi_cmd_dc_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + unsigned char reserved[3]; /* Reserved */ + unsigned char ald; /* Allow link down */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* Reset Channel */ +struct ncsi_cmd_rc_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + __be32 reserved; /* Reserved */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* AEN Enable */ +struct ncsi_cmd_ae_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + unsigned char reserved[3]; /* Reserved */ + unsigned char mc_id; /* MC ID */ + __be32 mode; /* AEN working mode */ + __be32 checksum; /* Checksum */ + unsigned char pad[18]; +}; + +/* Set Link */ +struct ncsi_cmd_sl_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + __be32 mode; /* Link working mode */ + __be32 oem_mode; /* OEM link mode */ + __be32 checksum; /* Checksum */ + unsigned char pad[18]; +}; + +/* Set VLAN Filter */ +struct ncsi_cmd_svf_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + __be16 reserved; /* Reserved */ + __be16 vlan; /* VLAN ID */ + __be16 reserved1; /* Reserved */ + unsigned char index; /* VLAN table index */ + unsigned char enable; /* Enable or disable */ + __be32 checksum; /* Checksum */ + unsigned char pad[14]; +}; + +/* Enable VLAN */ +struct ncsi_cmd_ev_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + unsigned char reserved[3]; /* Reserved */ + unsigned char mode; /* VLAN filter mode */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* Set MAC Address */ +struct ncsi_cmd_sma_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + unsigned char mac[6]; /* MAC address */ + unsigned char index; /* MAC table index */ + unsigned char at_e; /* Addr type and operation */ + __be32 checksum; /* Checksum */ + unsigned char pad[18]; +}; + +/* Enable Broadcast Filter */ +struct ncsi_cmd_ebf_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + __be32 mode; /* Filter mode */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* Enable Global Multicast Filter */ +struct ncsi_cmd_egmf_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + __be32 mode; /* Global MC mode */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* Set NCSI Flow Control */ +struct ncsi_cmd_snfc_pkt { + struct ncsi_cmd_pkt_hdr cmd; /* Command header */ + unsigned char reserved[3]; /* Reserved */ + unsigned char mode; /* Flow control mode */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* Get Link Status */ +struct ncsi_rsp_gls_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + __be32 status; /* Link status */ + __be32 other; /* Other indications */ + __be32 oem_status; /* OEM link status */ + __be32 checksum; + unsigned char pad[10]; +}; + +/* Get Version ID */ +struct ncsi_rsp_gvi_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + __be32 ncsi_version; /* NCSI version */ + unsigned char reserved[3]; /* Reserved */ + unsigned char alpha2; /* NCSI version */ + unsigned char fw_name[12]; /* f/w name string */ + __be32 fw_version; /* f/w version */ + __be16 pci_ids[4]; /* PCI IDs */ + __be32 mf_id; /* Manufacture ID */ + __be32 checksum; +}; + +/* Get Capabilities */ +struct ncsi_rsp_gc_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + __be32 cap; /* Capabilities */ + __be32 bc_cap; /* Broadcast cap */ + __be32 mc_cap; /* Multicast cap */ + __be32 buf_cap; /* Buffering cap */ + __be32 aen_cap; /* AEN cap */ + unsigned char vlan_cnt; /* VLAN filter count */ + unsigned char mixed_cnt; /* Mix filter count */ + unsigned char mc_cnt; /* MC filter count */ + unsigned char uc_cnt; /* UC filter count */ + unsigned char reserved[2]; /* Reserved */ + unsigned char vlan_mode; /* VLAN mode */ + unsigned char channel_cnt; /* Channel count */ + __be32 checksum; /* Checksum */ +}; + +/* Get Parameters */ +struct ncsi_rsp_gp_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + unsigned char mac_cnt; /* Number of MAC addr */ + unsigned char reserved[2]; /* Reserved */ + unsigned char mac_enable; /* MAC addr enable flags */ + unsigned char vlan_cnt; /* VLAN tag count */ + unsigned char reserved1; /* Reserved */ + __be16 vlan_enable; /* VLAN tag enable flags */ + __be32 link_mode; /* Link setting */ + __be32 bc_mode; /* BC filter mode */ + __be32 valid_modes; /* Valid mode parameters */ + unsigned char vlan_mode; /* VLAN mode */ + unsigned char fc_mode; /* Flow control mode */ + unsigned char reserved2[2]; /* Reserved */ + __be32 aen_mode; /* AEN mode */ + unsigned char mac[6]; /* Supported MAC addr */ + __be16 vlan; /* Supported VLAN tags */ + __be32 checksum; /* Checksum */ +}; + +/* Get Controller Packet Statistics */ +struct ncsi_rsp_gcps_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + __be32 cnt_hi; /* Counter cleared */ + __be32 cnt_lo; /* Counter cleared */ + __be32 rx_bytes; /* Rx bytes */ + __be32 tx_bytes; /* Tx bytes */ + __be32 rx_uc_pkts; /* Rx UC packets */ + __be32 rx_mc_pkts; /* Rx MC packets */ + __be32 rx_bc_pkts; /* Rx BC packets */ + __be32 tx_uc_pkts; /* Tx UC packets */ + __be32 tx_mc_pkts; /* Tx MC packets */ + __be32 tx_bc_pkts; /* Tx BC packets */ + __be32 fcs_err; /* FCS errors */ + __be32 align_err; /* Alignment errors */ + __be32 false_carrier; /* False carrier detection */ + __be32 runt_pkts; /* Rx runt packets */ + __be32 jabber_pkts; /* Rx jabber packets */ + __be32 rx_pause_xon; /* Rx pause XON frames */ + __be32 rx_pause_xoff; /* Rx XOFF frames */ + __be32 tx_pause_xon; /* Tx XON frames */ + __be32 tx_pause_xoff; /* Tx XOFF frames */ + __be32 tx_s_collision; /* Single collision frames */ + __be32 tx_m_collision; /* Multiple collision frames */ + __be32 l_collision; /* Late collision frames */ + __be32 e_collision; /* Excessive collision frames */ + __be32 rx_ctl_frames; /* Rx control frames */ + __be32 rx_64_frames; /* Rx 64-bytes frames */ + __be32 rx_127_frames; /* Rx 65-127 bytes frames */ + __be32 rx_255_frames; /* Rx 128-255 bytes frames */ + __be32 rx_511_frames; /* Rx 256-511 bytes frames */ + __be32 rx_1023_frames; /* Rx 512-1023 bytes frames */ + __be32 rx_1522_frames; /* Rx 1024-1522 bytes frames */ + __be32 rx_9022_frames; /* Rx 1523-9022 bytes frames */ + __be32 tx_64_frames; /* Tx 64-bytes frames */ + __be32 tx_127_frames; /* Tx 65-127 bytes frames */ + __be32 tx_255_frames; /* Tx 128-255 bytes frames */ + __be32 tx_511_frames; /* Tx 256-511 bytes frames */ + __be32 tx_1023_frames; /* Tx 512-1023 bytes frames */ + __be32 tx_1522_frames; /* Tx 1024-1522 bytes frames */ + __be32 tx_9022_frames; /* Tx 1523-9022 bytes frames */ + __be32 rx_valid_bytes; /* Rx valid bytes */ + __be32 rx_runt_pkts; /* Rx error runt packets */ + __be32 rx_jabber_pkts; /* Rx error jabber packets */ + __be32 checksum; /* Checksum */ +}; + +/* Get NCSI Statistics */ +struct ncsi_rsp_gns_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + __be32 rx_cmds; /* Rx NCSI commands */ + __be32 dropped_cmds; /* Dropped commands */ + __be32 cmd_type_errs; /* Command type errors */ + __be32 cmd_csum_errs; /* Command checksum errors */ + __be32 rx_pkts; /* Rx NCSI packets */ + __be32 tx_pkts; /* Tx NCSI packets */ + __be32 tx_aen_pkts; /* Tx AEN packets */ + __be32 checksum; /* Checksum */ +}; + +/* Get NCSI Pass-through Statistics */ +struct ncsi_rsp_gnpts_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + __be32 tx_pkts; /* Tx packets */ + __be32 tx_dropped; /* Tx dropped packets */ + __be32 tx_channel_err; /* Tx channel errors */ + __be32 tx_us_err; /* Tx undersize errors */ + __be32 rx_pkts; /* Rx packets */ + __be32 rx_dropped; /* Rx dropped packets */ + __be32 rx_channel_err; /* Rx channel errors */ + __be32 rx_us_err; /* Rx undersize errors */ + __be32 rx_os_err; /* Rx oversize errors */ + __be32 checksum; /* Checksum */ +}; + +/* Get package status */ +struct ncsi_rsp_gps_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + __be32 status; /* Hardware arbitration status */ + __be32 checksum; +}; + +/* Get package UUID */ +struct ncsi_rsp_gpuuid_pkt { + struct ncsi_rsp_pkt_hdr rsp; /* Response header */ + unsigned char uuid[16]; /* UUID */ + __be32 checksum; +}; + +/* AEN: Link State Change */ +struct ncsi_aen_lsc_pkt { + struct ncsi_aen_pkt_hdr aen; /* AEN header */ + __be32 status; /* Link status */ + __be32 oem_status; /* OEM link status */ + __be32 checksum; /* Checksum */ + unsigned char pad[14]; +}; + +/* AEN: Configuration Required */ +struct ncsi_aen_cr_pkt { + struct ncsi_aen_pkt_hdr aen; /* AEN header */ + __be32 checksum; /* Checksum */ + unsigned char pad[22]; +}; + +/* AEN: Host Network Controller Driver Status Change */ +struct ncsi_aen_hncdsc_pkt { + struct ncsi_aen_pkt_hdr aen; /* AEN header */ + __be32 status; /* Status */ + __be32 checksum; /* Checksum */ + unsigned char pad[18]; +}; + +/* NCSI packet revision */ +#define NCSI_PKT_REVISION 0x01 + +/* NCSI packet commands */ +#define NCSI_PKT_CMD_CIS 0x00 /* Clear Initial State */ +#define NCSI_PKT_CMD_SP 0x01 /* Select Package */ +#define NCSI_PKT_CMD_DP 0x02 /* Deselect Package */ +#define NCSI_PKT_CMD_EC 0x03 /* Enable Channel */ +#define NCSI_PKT_CMD_DC 0x04 /* Disable Channel */ +#define NCSI_PKT_CMD_RC 0x05 /* Reset Channel */ +#define NCSI_PKT_CMD_ECNT 0x06 /* Enable Channel Network Tx */ +#define NCSI_PKT_CMD_DCNT 0x07 /* Disable Channel Network Tx */ +#define NCSI_PKT_CMD_AE 0x08 /* AEN Enable */ +#define NCSI_PKT_CMD_SL 0x09 /* Set Link */ +#define NCSI_PKT_CMD_GLS 0x0a /* Get Link */ +#define NCSI_PKT_CMD_SVF 0x0b /* Set VLAN Filter */ +#define NCSI_PKT_CMD_EV 0x0c /* Enable VLAN */ +#define NCSI_PKT_CMD_DV 0x0d /* Disable VLAN */ +#define NCSI_PKT_CMD_SMA 0x0e /* Set MAC address */ +#define NCSI_PKT_CMD_EBF 0x10 /* Enable Broadcast Filter */ +#define NCSI_PKT_CMD_DBF 0x11 /* Disable Broadcast Filter */ +#define NCSI_PKT_CMD_EGMF 0x12 /* Enable Global Multicast Filter */ +#define NCSI_PKT_CMD_DGMF 0x13 /* Disable Global Multicast Filter */ +#define NCSI_PKT_CMD_SNFC 0x14 /* Set NCSI Flow Control */ +#define NCSI_PKT_CMD_GVI 0x15 /* Get Version ID */ +#define NCSI_PKT_CMD_GC 0x16 /* Get Capabilities */ +#define NCSI_PKT_CMD_GP 0x17 /* Get Parameters */ +#define NCSI_PKT_CMD_GCPS 0x18 /* Get Controller Packet Statistics */ +#define NCSI_PKT_CMD_GNS 0x19 /* Get NCSI Statistics */ +#define NCSI_PKT_CMD_GNPTS 0x1a /* Get NCSI Pass-throu Statistics */ +#define NCSI_PKT_CMD_GPS 0x1b /* Get package status */ +#define NCSI_PKT_CMD_OEM 0x50 /* OEM */ +#define NCSI_PKT_CMD_PLDM 0x51 /* PLDM request over NCSI over RBT */ +#define NCSI_PKT_CMD_GPUUID 0x52 /* Get package UUID */ + +/* NCSI packet responses */ +#define NCSI_PKT_RSP_CIS (NCSI_PKT_CMD_CIS + 0x80) +#define NCSI_PKT_RSP_SP (NCSI_PKT_CMD_SP + 0x80) +#define NCSI_PKT_RSP_DP (NCSI_PKT_CMD_DP + 0x80) +#define NCSI_PKT_RSP_EC (NCSI_PKT_CMD_EC + 0x80) +#define NCSI_PKT_RSP_DC (NCSI_PKT_CMD_DC + 0x80) +#define NCSI_PKT_RSP_RC (NCSI_PKT_CMD_RC + 0x80) +#define NCSI_PKT_RSP_ECNT (NCSI_PKT_CMD_ECNT + 0x80) +#define NCSI_PKT_RSP_DCNT (NCSI_PKT_CMD_DCNT + 0x80) +#define NCSI_PKT_RSP_AE (NCSI_PKT_CMD_AE + 0x80) +#define NCSI_PKT_RSP_SL (NCSI_PKT_CMD_SL + 0x80) +#define NCSI_PKT_RSP_GLS (NCSI_PKT_CMD_GLS + 0x80) +#define NCSI_PKT_RSP_SVF (NCSI_PKT_CMD_SVF + 0x80) +#define NCSI_PKT_RSP_EV (NCSI_PKT_CMD_EV + 0x80) +#define NCSI_PKT_RSP_DV (NCSI_PKT_CMD_DV + 0x80) +#define NCSI_PKT_RSP_SMA (NCSI_PKT_CMD_SMA + 0x80) +#define NCSI_PKT_RSP_EBF (NCSI_PKT_CMD_EBF + 0x80) +#define NCSI_PKT_RSP_DBF (NCSI_PKT_CMD_DBF + 0x80) +#define NCSI_PKT_RSP_EGMF (NCSI_PKT_CMD_EGMF + 0x80) +#define NCSI_PKT_RSP_DGMF (NCSI_PKT_CMD_DGMF + 0x80) +#define NCSI_PKT_RSP_SNFC (NCSI_PKT_CMD_SNFC + 0x80) +#define NCSI_PKT_RSP_GVI (NCSI_PKT_CMD_GVI + 0x80) +#define NCSI_PKT_RSP_GC (NCSI_PKT_CMD_GC + 0x80) +#define NCSI_PKT_RSP_GP (NCSI_PKT_CMD_GP + 0x80) +#define NCSI_PKT_RSP_GCPS (NCSI_PKT_CMD_GCPS + 0x80) +#define NCSI_PKT_RSP_GNS (NCSI_PKT_CMD_GNS + 0x80) +#define NCSI_PKT_RSP_GNPTS (NCSI_PKT_CMD_GNPTS + 0x80) +#define NCSI_PKT_RSP_GPS (NCSI_PKT_CMD_GPS + 0x80) +#define NCSI_PKT_RSP_OEM (NCSI_PKT_CMD_OEM + 0x80) +#define NCSI_PKT_RSP_PLDM (NCSI_PKT_CMD_PLDM + 0x80) +#define NCSI_PKT_RSP_GPUUID (NCSI_PKT_CMD_GPUUID + 0x80) + +/* NCSI response code/reason */ +#define NCSI_PKT_RSP_C_COMPLETED 0x0000 /* Command Completed */ +#define NCSI_PKT_RSP_C_FAILED 0x0001 /* Command Failed */ +#define NCSI_PKT_RSP_C_UNAVAILABLE 0x0002 /* Command Unavailable */ +#define NCSI_PKT_RSP_C_UNSUPPORTED 0x0003 /* Command Unsupported */ +#define NCSI_PKT_RSP_R_NO_ERROR 0x0000 /* No Error */ +#define NCSI_PKT_RSP_R_INTERFACE 0x0001 /* Interface not ready */ +#define NCSI_PKT_RSP_R_PARAM 0x0002 /* Invalid Parameter */ +#define NCSI_PKT_RSP_R_CHANNEL 0x0003 /* Channel not Ready */ +#define NCSI_PKT_RSP_R_PACKAGE 0x0004 /* Package not Ready */ +#define NCSI_PKT_RSP_R_LENGTH 0x0005 /* Invalid payload length */ +#define NCSI_PKT_RSP_R_UNKNOWN 0x7fff /* Command type unsupported */ + +/* NCSI AEN packet type */ +#define NCSI_PKT_AEN 0xFF /* AEN Packet */ +#define NCSI_PKT_AEN_LSC 0x00 /* Link status change */ +#define NCSI_PKT_AEN_CR 0x01 /* Configuration required */ +#define NCSI_PKT_AEN_HNCDSC 0x02 /* HNC driver status change */ + +#endif /* __NCSI_PKT_H__ */ diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c new file mode 100644 index 0000000..af84389 --- /dev/null +++ b/net/ncsi/ncsi-rsp.c @@ -0,0 +1,1035 @@ +/* + * Copyright Gavin Shan, IBM Corporation 2016. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#include <linux/module.h> +#include <linux/kernel.h> +#include <linux/init.h> +#include <linux/netdevice.h> +#include <linux/skbuff.h> + +#include <net/ncsi.h> +#include <net/net_namespace.h> +#include <net/sock.h> + +#include "internal.h" +#include "ncsi-pkt.h" + +static int ncsi_validate_rsp_pkt(struct ncsi_request *nr, + unsigned short payload) +{ + struct ncsi_rsp_pkt_hdr *h; + u32 checksum; + __be32 *pchecksum; + + /* Check NCSI packet header. We don't need validate + * the packet type, which should have been checked + * before calling this function. + */ + h = (struct ncsi_rsp_pkt_hdr *)skb_network_header(nr->rsp); + if (h->common.revision != NCSI_PKT_REVISION) + return -EINVAL; + if (ntohs(h->common.length) != payload) + return -EINVAL; + + /* Check on code and reason */ + if (ntohs(h->code) != NCSI_PKT_RSP_C_COMPLETED || + ntohs(h->reason) != NCSI_PKT_RSP_R_NO_ERROR) + return -EINVAL; + + /* Validate checksum, which might be zeroes if the + * sender doesn't support checksum according to NCSI + * specification. + */ + pchecksum = (__be32 *)((void *)(h + 1) + payload - 4); + if (ntohl(*pchecksum) == 0) + return 0; + + checksum = ncsi_calculate_checksum((unsigned char *)h, + sizeof(*h) + payload - 4); + if (*pchecksum != htonl(checksum)) + return -EINVAL; + + return 0; +} + +static int ncsi_rsp_handler_cis(struct ncsi_request *nr) +{ + struct ncsi_rsp_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_package *np; + struct ncsi_channel *nc; + unsigned char id; + + rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, &np, &nc); + if (!nc) { + if (ndp->flags & NCSI_DEV_PROBED) + return -ENXIO; + + id = NCSI_CHANNEL_INDEX(rsp->rsp.common.channel); + nc = ncsi_add_channel(np, id); + } + + return nc ? 0 : -ENODEV; +} + +static int ncsi_rsp_handler_sp(struct ncsi_request *nr) +{ + struct ncsi_rsp_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_package *np; + unsigned char id; + + /* Add the package if it's not existing. Otherwise, + * to change the state of its child channels. + */ + rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + &np, NULL); + if (!np) { + if (ndp->flags & NCSI_DEV_PROBED) + return -ENXIO; + + id = NCSI_PACKAGE_INDEX(rsp->rsp.common.channel); + np = ncsi_add_package(ndp, id); + if (!np) + return -ENODEV; + } + + return 0; +} + +static int ncsi_rsp_handler_dp(struct ncsi_request *nr) +{ + struct ncsi_rsp_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_package *np; + struct ncsi_channel *nc; + unsigned long flags; + + /* Find the package */ + rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + &np, NULL); + if (!np) + return -ENODEV; + + /* Change state of all channels attached to the package */ + NCSI_FOR_EACH_CHANNEL(np, nc) { + spin_lock_irqsave(&nc->lock, flags); + nc->state = NCSI_CHANNEL_INACTIVE; + spin_unlock_irqrestore(&nc->lock, flags); + } + + return 0; +} + +static int ncsi_rsp_handler_ec(struct ncsi_request *nr) +{ + struct ncsi_rsp_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_channel *nc; + struct ncsi_channel_mode *ncm; + + /* Find the package and channel */ + rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + NULL, &nc); + if (!nc) + return -ENODEV; + + ncm = &nc->modes[NCSI_MODE_ENABLE]; + if (ncm->enable) + return -EBUSY; + + ncm->enable = 1; + return 0; +} + +static int ncsi_rsp_handler_dc(struct ncsi_request *nr) +{ + struct ncsi_rsp_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_channel *nc; + struct ncsi_channel_mode *ncm; + int ret; + + ret = ncsi_validate_rsp_pkt(nr, 4); + if (ret) + return ret; + + /* Find the package and channel */ + rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + NULL, &nc); + if (!nc) + return -ENODEV; + + ncm = &nc->modes[NCSI_MODE_ENABLE]; + if (!ncm->enable) + return -EBUSY; + + ncm->enable = 0; + return 0; +} + +static int ncsi_rsp_handler_rc(struct ncsi_request *nr) +{ + struct ncsi_rsp_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_channel *nc; + unsigned long flags; + + /* Find the package and channel */ + rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + NULL, &nc); + if (!nc) + return -ENODEV; + + /* Update state for the specified channel */ + spin_lock_irqsave(&nc->lock, flags); + nc->state = NCSI_CHANNEL_INACTIVE; + spin_unlock_irqrestore(&nc->lock, flags); + + return 0; +} + +static int ncsi_rsp_handler_ecnt(struct ncsi_request *nr) +{ + struct ncsi_rsp_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_channel *nc; + struct ncsi_channel_mode *ncm; + + /* Find the package and channel */ + rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + NULL, &nc); + if (!nc) + return -ENODEV; + + ncm = &nc->modes[NCSI_MODE_TX_ENABLE]; + if (ncm->enable) + return -EBUSY; + + ncm->enable = 1; + return 0; +} + +static int ncsi_rsp_handler_dcnt(struct ncsi_request *nr) +{ + struct ncsi_rsp_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_channel *nc; + struct ncsi_channel_mode *ncm; + + /* Find the package and channel */ + rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + NULL, &nc); + if (!nc) + return -ENODEV; + + ncm = &nc->modes[NCSI_MODE_TX_ENABLE]; + if (!ncm->enable) + return -EBUSY; + + ncm->enable = 1; + return 0; +} + +static int ncsi_rsp_handler_ae(struct ncsi_request *nr) +{ + struct ncsi_cmd_ae_pkt *cmd; + struct ncsi_rsp_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_channel *nc; + struct ncsi_channel_mode *ncm; + + /* Find the package and channel */ + rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + NULL, &nc); + if (!nc) + return -ENODEV; + + /* Check if the AEN has been enabled */ + ncm = &nc->modes[NCSI_MODE_AEN]; + if (ncm->enable) + return -EBUSY; + + /* Update to AEN configuration */ + cmd = (struct ncsi_cmd_ae_pkt *)skb_network_header(nr->cmd); + ncm->enable = 1; + ncm->data[0] = cmd->mc_id; + ncm->data[1] = ntohl(cmd->mode); + + return 0; +} + +static int ncsi_rsp_handler_sl(struct ncsi_request *nr) +{ + struct ncsi_cmd_sl_pkt *cmd; + struct ncsi_rsp_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_channel *nc; + struct ncsi_channel_mode *ncm; + + /* Find the package and channel */ + rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + NULL, &nc); + if (!nc) + return -ENODEV; + + cmd = (struct ncsi_cmd_sl_pkt *)skb_network_header(nr->cmd); + ncm = &nc->modes[NCSI_MODE_LINK]; + ncm->data[0] = ntohl(cmd->mode); + ncm->data[1] = ntohl(cmd->oem_mode); + + return 0; +} + +static int ncsi_rsp_handler_gls(struct ncsi_request *nr) +{ + struct ncsi_rsp_gls_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_channel *nc; + struct ncsi_channel_mode *ncm; + unsigned long flags; + + /* Find the package and channel */ + rsp = (struct ncsi_rsp_gls_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + NULL, &nc); + if (!nc) + return -ENODEV; + + ncm = &nc->modes[NCSI_MODE_LINK]; + ncm->data[2] = ntohl(rsp->status); + ncm->data[3] = ntohl(rsp->other); + ncm->data[4] = ntohl(rsp->oem_status); + + if (nr->driven) + return 0; + + /* Reset the channel monitor if it has been enabled */ + spin_lock_irqsave(&nc->lock, flags); + nc->timeout = 0; + spin_unlock_irqrestore(&nc->lock, flags); + + return 0; +} + +static int ncsi_rsp_handler_svf(struct ncsi_request *nr) +{ + struct ncsi_cmd_svf_pkt *cmd; + struct ncsi_rsp_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_channel *nc; + struct ncsi_channel_filter *ncf; + unsigned short vlan; + int ret; + + /* Find the package and channel */ + rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + NULL, &nc); + if (!nc) + return -ENODEV; + + cmd = (struct ncsi_cmd_svf_pkt *)skb_network_header(nr->cmd); + ncf = nc->filters[NCSI_FILTER_VLAN]; + if (!ncf) + return -ENOENT; + if (cmd->index >= ncf->total) + return -ERANGE; + + /* Add or remove the VLAN filter */ + if (!(cmd->enable & 0x1)) { + ret = ncsi_remove_filter(nc, NCSI_FILTER_VLAN, cmd->index); + } else { + vlan = ntohs(cmd->vlan); + ret = ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan); + } + + return ret; +} + +static int ncsi_rsp_handler_ev(struct ncsi_request *nr) +{ + struct ncsi_cmd_ev_pkt *cmd; + struct ncsi_rsp_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_channel *nc; + struct ncsi_channel_mode *ncm; + + /* Find the package and channel */ + rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + NULL, &nc); + if (!nc) + return -ENODEV; + + /* Check if VLAN mode has been enabled */ + ncm = &nc->modes[NCSI_MODE_VLAN]; + if (ncm->enable) + return -EBUSY; + + /* Update to VLAN mode */ + cmd = (struct ncsi_cmd_ev_pkt *)skb_network_header(nr->cmd); + ncm->enable = 1; + ncm->data[0] = ntohl(cmd->mode); + + return 0; +} + +static int ncsi_rsp_handler_dv(struct ncsi_request *nr) +{ + struct ncsi_rsp_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_channel *nc; + struct ncsi_channel_mode *ncm; + + /* Find the package and channel */ + rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + NULL, &nc); + if (!nc) + return -ENODEV; + + /* Check if VLAN mode has been enabled */ + ncm = &nc->modes[NCSI_MODE_VLAN]; + if (!ncm->enable) + return -EBUSY; + + /* Update to VLAN mode */ + ncm->enable = 0; + return 0; +} + +static int ncsi_rsp_handler_sma(struct ncsi_request *nr) +{ + struct ncsi_cmd_sma_pkt *cmd; + struct ncsi_rsp_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_channel *nc; + struct ncsi_channel_filter *ncf; + void *bitmap; + + /* Find the package and channel */ + rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + NULL, &nc); + if (!nc) + return -ENODEV; + + /* According to NCSI spec 1.01, the mixed filter table + * isn't supported yet. + */ + cmd = (struct ncsi_cmd_sma_pkt *)skb_network_header(nr->cmd); + switch (cmd->at_e >> 5) { + case 0x0: /* UC address */ + ncf = nc->filters[NCSI_FILTER_UC]; + break; + case 0x1: /* MC address */ + ncf = nc->filters[NCSI_FILTER_MC]; + break; + default: + return -EINVAL; + } + + /* Sanity check on the filter */ + if (!ncf) + return -ENOENT; + else if (cmd->index >= ncf->total) + return -ERANGE; + + bitmap = &ncf->bitmap; + if (cmd->at_e & 0x1) { + if (test_and_set_bit(cmd->index, bitmap)) + return -EBUSY; + memcpy(ncf->data + 6 * cmd->index, cmd->mac, 6); + } else { + if (!test_and_clear_bit(cmd->index, bitmap)) + return -EBUSY; + + memset(ncf->data + 6 * cmd->index, 0, 6); + } + + return 0; +} + +static int ncsi_rsp_handler_ebf(struct ncsi_request *nr) +{ + struct ncsi_cmd_ebf_pkt *cmd; + struct ncsi_rsp_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_channel *nc; + struct ncsi_channel_mode *ncm; + + /* Find the package and channel */ + rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, NULL, &nc); + if (!nc) + return -ENODEV; + + /* Check if broadcast filter has been enabled */ + ncm = &nc->modes[NCSI_MODE_BC]; + if (ncm->enable) + return -EBUSY; + + /* Update to broadcast filter mode */ + cmd = (struct ncsi_cmd_ebf_pkt *)skb_network_header(nr->cmd); + ncm->enable = 1; + ncm->data[0] = ntohl(cmd->mode); + + return 0; +} + +static int ncsi_rsp_handler_dbf(struct ncsi_request *nr) +{ + struct ncsi_rsp_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_channel *nc; + struct ncsi_channel_mode *ncm; + + rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + NULL, &nc); + if (!nc) + return -ENODEV; + + /* Check if broadcast filter isn't enabled */ + ncm = &nc->modes[NCSI_MODE_BC]; + if (!ncm->enable) + return -EBUSY; + + /* Update to broadcast filter mode */ + ncm->enable = 0; + ncm->data[0] = 0; + + return 0; +} + +static int ncsi_rsp_handler_egmf(struct ncsi_request *nr) +{ + struct ncsi_cmd_egmf_pkt *cmd; + struct ncsi_rsp_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_channel *nc; + struct ncsi_channel_mode *ncm; + + /* Find the channel */ + rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + NULL, &nc); + if (!nc) + return -ENODEV; + + /* Check if multicast filter has been enabled */ + ncm = &nc->modes[NCSI_MODE_MC]; + if (ncm->enable) + return -EBUSY; + + /* Update to multicast filter mode */ + cmd = (struct ncsi_cmd_egmf_pkt *)skb_network_header(nr->cmd); + ncm->enable = 1; + ncm->data[0] = ntohl(cmd->mode); + + return 0; +} + +static int ncsi_rsp_handler_dgmf(struct ncsi_request *nr) +{ + struct ncsi_rsp_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_channel *nc; + struct ncsi_channel_mode *ncm; + + rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + NULL, &nc); + if (!nc) + return -ENODEV; + + /* Check if multicast filter has been enabled */ + ncm = &nc->modes[NCSI_MODE_MC]; + if (!ncm->enable) + return -EBUSY; + + /* Update to multicast filter mode */ + ncm->enable = 0; + ncm->data[0] = 0; + + return 0; +} + +static int ncsi_rsp_handler_snfc(struct ncsi_request *nr) +{ + struct ncsi_cmd_snfc_pkt *cmd; + struct ncsi_rsp_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_channel *nc; + struct ncsi_channel_mode *ncm; + + /* Find the channel */ + rsp = (struct ncsi_rsp_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + NULL, &nc); + if (!nc) + return -ENODEV; + + /* Check if flow control has been enabled */ + ncm = &nc->modes[NCSI_MODE_FC]; + if (ncm->enable) + return -EBUSY; + + /* Update to flow control mode */ + cmd = (struct ncsi_cmd_snfc_pkt *)skb_network_header(nr->cmd); + ncm->enable = 1; + ncm->data[0] = cmd->mode; + + return 0; +} + +static int ncsi_rsp_handler_gvi(struct ncsi_request *nr) +{ + struct ncsi_rsp_gvi_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_channel *nc; + struct ncsi_channel_version *ncv; + int i; + + /* Find the channel */ + rsp = (struct ncsi_rsp_gvi_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + NULL, &nc); + if (!nc) + return -ENODEV; + + /* Update to channel's version info */ + ncv = &nc->version; + ncv->version = ntohl(rsp->ncsi_version); + ncv->alpha2 = rsp->alpha2; + memcpy(ncv->fw_name, rsp->fw_name, 12); + ncv->fw_version = ntohl(rsp->fw_version); + for (i = 0; i < ARRAY_SIZE(ncv->pci_ids); i++) + ncv->pci_ids[i] = ntohs(rsp->pci_ids[i]); + ncv->mf_id = ntohl(rsp->mf_id); + + return 0; +} + +static int ncsi_rsp_handler_gc(struct ncsi_request *nr) +{ + struct ncsi_rsp_gc_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_channel *nc; + struct ncsi_channel_filter *ncf; + size_t size, entry_size; + int cnt, i; + + /* Find the channel */ + rsp = (struct ncsi_rsp_gc_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + NULL, &nc); + if (!nc) + return -ENODEV; + + /* Update channel's capabilities */ + nc->caps[NCSI_CAP_GENERIC].cap = ntohl(rsp->cap) & + NCSI_CAP_GENERIC_MASK; + nc->caps[NCSI_CAP_BC].cap = ntohl(rsp->bc_cap) & + NCSI_CAP_BC_MASK; + nc->caps[NCSI_CAP_MC].cap = ntohl(rsp->mc_cap) & + NCSI_CAP_MC_MASK; + nc->caps[NCSI_CAP_BUFFER].cap = ntohl(rsp->buf_cap); + nc->caps[NCSI_CAP_AEN].cap = ntohl(rsp->aen_cap) & + NCSI_CAP_AEN_MASK; + nc->caps[NCSI_CAP_VLAN].cap = rsp->vlan_mode & + NCSI_CAP_VLAN_MASK; + + /* Build filters */ + for (i = 0; i < NCSI_FILTER_MAX; i++) { + switch (i) { + case NCSI_FILTER_VLAN: + cnt = rsp->vlan_cnt; + entry_size = 2; + break; + case NCSI_FILTER_MIXED: + cnt = rsp->mixed_cnt; + entry_size = 6; + break; + case NCSI_FILTER_MC: + cnt = rsp->mc_cnt; + entry_size = 6; + break; + case NCSI_FILTER_UC: + cnt = rsp->uc_cnt; + entry_size = 6; + break; + default: + continue; + } + + if (!cnt || nc->filters[i]) + continue; + + size = sizeof(*ncf) + cnt * entry_size; + ncf = kzalloc(size, GFP_ATOMIC); + if (!ncf) { + pr_warn("%s: Cannot alloc filter table (%d)\n", + __func__, i); + return -ENOMEM; + } + + ncf->index = i; + ncf->total = cnt; + ncf->bitmap = 0x0ul; + nc->filters[i] = ncf; + } + + return 0; +} + +static int ncsi_rsp_handler_gp(struct ncsi_request *nr) +{ + struct ncsi_rsp_gp_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_channel *nc; + unsigned short enable, vlan; + unsigned char *pdata; + int table, i; + + /* Find the channel */ + rsp = (struct ncsi_rsp_gp_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + NULL, &nc); + if (!nc) + return -ENODEV; + + /* Modes with explicit enabled indications */ + if (ntohl(rsp->valid_modes) & 0x1) { /* BC filter mode */ + nc->modes[NCSI_MODE_BC].enable = 1; + nc->modes[NCSI_MODE_BC].data[0] = ntohl(rsp->bc_mode); + } + if (ntohl(rsp->valid_modes) & 0x2) /* Channel enabled */ + nc->modes[NCSI_MODE_ENABLE].enable = 1; + if (ntohl(rsp->valid_modes) & 0x4) /* Channel Tx enabled */ + nc->modes[NCSI_MODE_TX_ENABLE].enable = 1; + if (ntohl(rsp->valid_modes) & 0x8) /* MC filter mode */ + nc->modes[NCSI_MODE_MC].enable = 1; + + /* Modes without explicit enabled indications */ + nc->modes[NCSI_MODE_LINK].enable = 1; + nc->modes[NCSI_MODE_LINK].data[0] = ntohl(rsp->link_mode); + nc->modes[NCSI_MODE_VLAN].enable = 1; + nc->modes[NCSI_MODE_VLAN].data[0] = rsp->vlan_mode; + nc->modes[NCSI_MODE_FC].enable = 1; + nc->modes[NCSI_MODE_FC].data[0] = rsp->fc_mode; + nc->modes[NCSI_MODE_AEN].enable = 1; + nc->modes[NCSI_MODE_AEN].data[0] = ntohl(rsp->aen_mode); + + /* MAC addresses filter table */ + pdata = (unsigned char *)rsp + 48; + enable = rsp->mac_enable; + for (i = 0; i < rsp->mac_cnt; i++, pdata += 6) { + if (i >= (nc->filters[NCSI_FILTER_UC]->total + + nc->filters[NCSI_FILTER_MC]->total)) + table = NCSI_FILTER_MIXED; + else if (i >= nc->filters[NCSI_FILTER_UC]->total) + table = NCSI_FILTER_MC; + else + table = NCSI_FILTER_UC; + + if (!(enable & (0x1 << i))) + continue; + + if (ncsi_find_filter(nc, table, pdata) >= 0) + continue; + + ncsi_add_filter(nc, table, pdata); + } + + /* VLAN filter table */ + enable = ntohs(rsp->vlan_enable); + for (i = 0; i < rsp->vlan_cnt; i++, pdata += 2) { + if (!(enable & (0x1 << i))) + continue; + + vlan = ntohs(*(__be16 *)pdata); + if (ncsi_find_filter(nc, NCSI_FILTER_VLAN, &vlan) >= 0) + continue; + + ncsi_add_filter(nc, NCSI_FILTER_VLAN, &vlan); + } + + return 0; +} + +static int ncsi_rsp_handler_gcps(struct ncsi_request *nr) +{ + struct ncsi_rsp_gcps_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_channel *nc; + struct ncsi_channel_stats *ncs; + + /* Find the channel */ + rsp = (struct ncsi_rsp_gcps_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + NULL, &nc); + if (!nc) + return -ENODEV; + + /* Update HNC's statistics */ + ncs = &nc->stats; + ncs->hnc_cnt_hi = ntohl(rsp->cnt_hi); + ncs->hnc_cnt_lo = ntohl(rsp->cnt_lo); + ncs->hnc_rx_bytes = ntohl(rsp->rx_bytes); + ncs->hnc_tx_bytes = ntohl(rsp->tx_bytes); + ncs->hnc_rx_uc_pkts = ntohl(rsp->rx_uc_pkts); + ncs->hnc_rx_mc_pkts = ntohl(rsp->rx_mc_pkts); + ncs->hnc_rx_bc_pkts = ntohl(rsp->rx_bc_pkts); + ncs->hnc_tx_uc_pkts = ntohl(rsp->tx_uc_pkts); + ncs->hnc_tx_mc_pkts = ntohl(rsp->tx_mc_pkts); + ncs->hnc_tx_bc_pkts = ntohl(rsp->tx_bc_pkts); + ncs->hnc_fcs_err = ntohl(rsp->fcs_err); + ncs->hnc_align_err = ntohl(rsp->align_err); + ncs->hnc_false_carrier = ntohl(rsp->false_carrier); + ncs->hnc_runt_pkts = ntohl(rsp->runt_pkts); + ncs->hnc_jabber_pkts = ntohl(rsp->jabber_pkts); + ncs->hnc_rx_pause_xon = ntohl(rsp->rx_pause_xon); + ncs->hnc_rx_pause_xoff = ntohl(rsp->rx_pause_xoff); + ncs->hnc_tx_pause_xon = ntohl(rsp->tx_pause_xon); + ncs->hnc_tx_pause_xoff = ntohl(rsp->tx_pause_xoff); + ncs->hnc_tx_s_collision = ntohl(rsp->tx_s_collision); + ncs->hnc_tx_m_collision = ntohl(rsp->tx_m_collision); + ncs->hnc_l_collision = ntohl(rsp->l_collision); + ncs->hnc_e_collision = ntohl(rsp->e_collision); + ncs->hnc_rx_ctl_frames = ntohl(rsp->rx_ctl_frames); + ncs->hnc_rx_64_frames = ntohl(rsp->rx_64_frames); + ncs->hnc_rx_127_frames = ntohl(rsp->rx_127_frames); + ncs->hnc_rx_255_frames = ntohl(rsp->rx_255_frames); + ncs->hnc_rx_511_frames = ntohl(rsp->rx_511_frames); + ncs->hnc_rx_1023_frames = ntohl(rsp->rx_1023_frames); + ncs->hnc_rx_1522_frames = ntohl(rsp->rx_1522_frames); + ncs->hnc_rx_9022_frames = ntohl(rsp->rx_9022_frames); + ncs->hnc_tx_64_frames = ntohl(rsp->tx_64_frames); + ncs->hnc_tx_127_frames = ntohl(rsp->tx_127_frames); + ncs->hnc_tx_255_frames = ntohl(rsp->tx_255_frames); + ncs->hnc_tx_511_frames = ntohl(rsp->tx_511_frames); + ncs->hnc_tx_1023_frames = ntohl(rsp->tx_1023_frames); + ncs->hnc_tx_1522_frames = ntohl(rsp->tx_1522_frames); + ncs->hnc_tx_9022_frames = ntohl(rsp->tx_9022_frames); + ncs->hnc_rx_valid_bytes = ntohl(rsp->rx_valid_bytes); + ncs->hnc_rx_runt_pkts = ntohl(rsp->rx_runt_pkts); + ncs->hnc_rx_jabber_pkts = ntohl(rsp->rx_jabber_pkts); + + return 0; +} + +static int ncsi_rsp_handler_gns(struct ncsi_request *nr) +{ + struct ncsi_rsp_gns_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_channel *nc; + struct ncsi_channel_stats *ncs; + + /* Find the channel */ + rsp = (struct ncsi_rsp_gns_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + NULL, &nc); + if (!nc) + return -ENODEV; + + /* Update HNC's statistics */ + ncs = &nc->stats; + ncs->ncsi_rx_cmds = ntohl(rsp->rx_cmds); + ncs->ncsi_dropped_cmds = ntohl(rsp->dropped_cmds); + ncs->ncsi_cmd_type_errs = ntohl(rsp->cmd_type_errs); + ncs->ncsi_cmd_csum_errs = ntohl(rsp->cmd_csum_errs); + ncs->ncsi_rx_pkts = ntohl(rsp->rx_pkts); + ncs->ncsi_tx_pkts = ntohl(rsp->tx_pkts); + ncs->ncsi_tx_aen_pkts = ntohl(rsp->tx_aen_pkts); + + return 0; +} + +static int ncsi_rsp_handler_gnpts(struct ncsi_request *nr) +{ + struct ncsi_rsp_gnpts_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_channel *nc; + struct ncsi_channel_stats *ncs; + + /* Find the channel */ + rsp = (struct ncsi_rsp_gnpts_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + NULL, &nc); + if (!nc) + return -ENODEV; + + /* Update HNC's statistics */ + ncs = &nc->stats; + ncs->pt_tx_pkts = ntohl(rsp->tx_pkts); + ncs->pt_tx_dropped = ntohl(rsp->tx_dropped); + ncs->pt_tx_channel_err = ntohl(rsp->tx_channel_err); + ncs->pt_tx_us_err = ntohl(rsp->tx_us_err); + ncs->pt_rx_pkts = ntohl(rsp->rx_pkts); + ncs->pt_rx_dropped = ntohl(rsp->rx_dropped); + ncs->pt_rx_channel_err = ntohl(rsp->rx_channel_err); + ncs->pt_rx_us_err = ntohl(rsp->rx_us_err); + ncs->pt_rx_os_err = ntohl(rsp->rx_os_err); + + return 0; +} + +static int ncsi_rsp_handler_gps(struct ncsi_request *nr) +{ + struct ncsi_rsp_gps_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_package *np; + + /* Find the package */ + rsp = (struct ncsi_rsp_gps_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + &np, NULL); + if (!np) + return -ENODEV; + + return 0; +} + +static int ncsi_rsp_handler_gpuuid(struct ncsi_request *nr) +{ + struct ncsi_rsp_gpuuid_pkt *rsp; + struct ncsi_dev_priv *ndp = nr->ndp; + struct ncsi_package *np; + + /* Find the package */ + rsp = (struct ncsi_rsp_gpuuid_pkt *)skb_network_header(nr->rsp); + ncsi_find_package_and_channel(ndp, rsp->rsp.common.channel, + &np, NULL); + if (!np) + return -ENODEV; + + memcpy(np->uuid, rsp->uuid, sizeof(rsp->uuid)); + + return 0; +} + +static struct ncsi_rsp_handler { + unsigned char type; + int payload; + int (*handler)(struct ncsi_request *nr); +} ncsi_rsp_handlers[] = { + { NCSI_PKT_RSP_CIS, 4, ncsi_rsp_handler_cis }, + { NCSI_PKT_RSP_SP, 4, ncsi_rsp_handler_sp }, + { NCSI_PKT_RSP_DP, 4, ncsi_rsp_handler_dp }, + { NCSI_PKT_RSP_EC, 4, ncsi_rsp_handler_ec }, + { NCSI_PKT_RSP_DC, 4, ncsi_rsp_handler_dc }, + { NCSI_PKT_RSP_RC, 4, ncsi_rsp_handler_rc }, + { NCSI_PKT_RSP_ECNT, 4, ncsi_rsp_handler_ecnt }, + { NCSI_PKT_RSP_DCNT, 4, ncsi_rsp_handler_dcnt }, + { NCSI_PKT_RSP_AE, 4, ncsi_rsp_handler_ae }, + { NCSI_PKT_RSP_SL, 4, ncsi_rsp_handler_sl }, + { NCSI_PKT_RSP_GLS, 16, ncsi_rsp_handler_gls }, + { NCSI_PKT_RSP_SVF, 4, ncsi_rsp_handler_svf }, + { NCSI_PKT_RSP_EV, 4, ncsi_rsp_handler_ev }, + { NCSI_PKT_RSP_DV, 4, ncsi_rsp_handler_dv }, + { NCSI_PKT_RSP_SMA, 4, ncsi_rsp_handler_sma }, + { NCSI_PKT_RSP_EBF, 4, ncsi_rsp_handler_ebf }, + { NCSI_PKT_RSP_DBF, 4, ncsi_rsp_handler_dbf }, + { NCSI_PKT_RSP_EGMF, 4, ncsi_rsp_handler_egmf }, + { NCSI_PKT_RSP_DGMF, 4, ncsi_rsp_handler_dgmf }, + { NCSI_PKT_RSP_SNFC, 4, ncsi_rsp_handler_snfc }, + { NCSI_PKT_RSP_GVI, 36, ncsi_rsp_handler_gvi }, + { NCSI_PKT_RSP_GC, 32, ncsi_rsp_handler_gc }, + { NCSI_PKT_RSP_GP, -1, ncsi_rsp_handler_gp }, + { NCSI_PKT_RSP_GCPS, 172, ncsi_rsp_handler_gcps }, + { NCSI_PKT_RSP_GNS, 172, ncsi_rsp_handler_gns }, + { NCSI_PKT_RSP_GNPTS, 172, ncsi_rsp_handler_gnpts }, + { NCSI_PKT_RSP_GPS, 8, ncsi_rsp_handler_gps }, + { NCSI_PKT_RSP_OEM, 0, NULL }, + { NCSI_PKT_RSP_PLDM, 0, NULL }, + { NCSI_PKT_RSP_GPUUID, 20, ncsi_rsp_handler_gpuuid } +}; + +int ncsi_rcv_rsp(struct sk_buff *skb, struct net_device *dev, + struct packet_type *pt, struct net_device *orig_dev) +{ + struct ncsi_rsp_handler *nrh = NULL; + struct ncsi_dev *nd; + struct ncsi_dev_priv *ndp; + struct ncsi_request *nr; + struct ncsi_pkt_hdr *hdr; + unsigned long flags; + int payload, i, ret; + + /* Find the NCSI device */ + nd = ncsi_find_dev(dev); + ndp = nd ? TO_NCSI_DEV_PRIV(nd) : NULL; + if (!ndp) + return -ENODEV; + + /* Check if it is AEN packet */ + hdr = (struct ncsi_pkt_hdr *)skb_network_header(skb); + if (hdr->type == NCSI_PKT_AEN) + return ncsi_aen_handler(ndp, skb); + + /* Find the handler */ + for (i = 0; i < ARRAY_SIZE(ncsi_rsp_handlers); i++) { + if (ncsi_rsp_handlers[i].type == hdr->type) { + if (ncsi_rsp_handlers[i].handler) + nrh = &ncsi_rsp_handlers[i]; + else + nrh = NULL; + + break; + } + } + + if (!nrh) { + netdev_err(nd->dev, "Received unrecognized packet (0x%x)\n", + hdr->type); + return -ENOENT; + } + + /* Associate with the request */ + spin_lock_irqsave(&ndp->lock, flags); + nr = &ndp->requests[hdr->id]; + if (!nr->used) { + spin_unlock_irqrestore(&ndp->lock, flags); + return -ENODEV; + } + + nr->rsp = skb; + if (!nr->enabled) { + spin_unlock_irqrestore(&ndp->lock, flags); + ret = -ENOENT; + goto out; + } + + /* Validate the packet */ + spin_unlock_irqrestore(&ndp->lock, flags); + payload = nrh->payload; + if (payload < 0) + payload = ntohs(hdr->length); + ret = ncsi_validate_rsp_pkt(nr, payload); + if (ret) + goto out; + + /* Process the packet */ + ret = nrh->handler(nr); +out: + ncsi_free_request(nr); + return ret; +} diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index a98b780..d2d2b35 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -21,6 +21,8 @@ hostprogs-y += spintest hostprogs-y += map_perf_test hostprogs-y += test_overhead hostprogs-y += test_cgrp2_array_pin +hostprogs-y += xdp1 +hostprogs-y += xdp2 test_verifier-objs := test_verifier.o libbpf.o test_maps-objs := test_maps.o libbpf.o @@ -42,6 +44,9 @@ spintest-objs := bpf_load.o libbpf.o spintest_user.o map_perf_test-objs := bpf_load.o libbpf.o map_perf_test_user.o test_overhead-objs := bpf_load.o libbpf.o test_overhead_user.o test_cgrp2_array_pin-objs := libbpf.o test_cgrp2_array_pin.o +xdp1-objs := bpf_load.o libbpf.o xdp1_user.o +# reuse xdp1 source intentionally +xdp2-objs := bpf_load.o libbpf.o xdp1_user.o # Tell kbuild to always build the programs always := $(hostprogs-y) @@ -64,6 +69,8 @@ always += test_overhead_tp_kern.o always += test_overhead_kprobe_kern.o always += parse_varlen.o parse_simple.o parse_ldabs.o always += test_cgrp2_tc_kern.o +always += xdp1_kern.o +always += xdp2_kern.o HOSTCFLAGS += -I$(objtree)/usr/include @@ -84,6 +91,8 @@ HOSTLOADLIBES_offwaketime += -lelf HOSTLOADLIBES_spintest += -lelf HOSTLOADLIBES_map_perf_test += -lelf -lrt HOSTLOADLIBES_test_overhead += -lelf -lrt +HOSTLOADLIBES_xdp1 += -lelf +HOSTLOADLIBES_xdp2 += -lelf # Allows pointing LLC/CLANG to a LLVM backend with bpf support, redefine on cmdline: # make samples/bpf/ LLC=~/git/llvm/build/bin/llc CLANG=~/git/llvm/build/bin/clang diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c index 022af71..0cfda23 100644 --- a/samples/bpf/bpf_load.c +++ b/samples/bpf/bpf_load.c @@ -50,6 +50,7 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) bool is_kprobe = strncmp(event, "kprobe/", 7) == 0; bool is_kretprobe = strncmp(event, "kretprobe/", 10) == 0; bool is_tracepoint = strncmp(event, "tracepoint/", 11) == 0; + bool is_xdp = strncmp(event, "xdp", 3) == 0; enum bpf_prog_type prog_type; char buf[256]; int fd, efd, err, id; @@ -66,6 +67,8 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) prog_type = BPF_PROG_TYPE_KPROBE; } else if (is_tracepoint) { prog_type = BPF_PROG_TYPE_TRACEPOINT; + } else if (is_xdp) { + prog_type = BPF_PROG_TYPE_XDP; } else { printf("Unknown event '%s'\n", event); return -1; @@ -79,6 +82,9 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size) prog_fd[prog_cnt++] = fd; + if (is_xdp) + return 0; + if (is_socket) { event += 6; if (*event != '/') @@ -319,6 +325,7 @@ int load_bpf_file(char *path) if (memcmp(shname_prog, "kprobe/", 7) == 0 || memcmp(shname_prog, "kretprobe/", 10) == 0 || memcmp(shname_prog, "tracepoint/", 11) == 0 || + memcmp(shname_prog, "xdp", 3) == 0 || memcmp(shname_prog, "socket", 6) == 0) load_and_attach(shname_prog, insns, data_prog->d_size); } @@ -336,6 +343,7 @@ int load_bpf_file(char *path) if (memcmp(shname, "kprobe/", 7) == 0 || memcmp(shname, "kretprobe/", 10) == 0 || memcmp(shname, "tracepoint/", 11) == 0 || + memcmp(shname, "xdp", 3) == 0 || memcmp(shname, "socket", 6) == 0) load_and_attach(shname, data->d_buf, data->d_size); } diff --git a/samples/bpf/xdp1_kern.c b/samples/bpf/xdp1_kern.c new file mode 100644 index 0000000..e7dd8ac --- /dev/null +++ b/samples/bpf/xdp1_kern.c @@ -0,0 +1,93 @@ +/* Copyright (c) 2016 PLUMgrid + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#define KBUILD_MODNAME "foo" +#include <uapi/linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") dropcnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = 256, +}; + +static int parse_ipv4(void *data, u64 nh_off, void *data_end) +{ + struct iphdr *iph = data + nh_off; + + if (iph + 1 > data_end) + return 0; + return iph->protocol; +} + +static int parse_ipv6(void *data, u64 nh_off, void *data_end) +{ + struct ipv6hdr *ip6h = data + nh_off; + + if (ip6h + 1 > data_end) + return 0; + return ip6h->nexthdr; +} + +SEC("xdp1") +int xdp_prog1(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + int rc = XDP_DROP; + long *value; + u16 h_proto; + u64 nh_off; + u32 index; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return rc; + + h_proto = eth->h_proto; + + if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { + struct vlan_hdr *vhdr; + + vhdr = data + nh_off; + nh_off += sizeof(struct vlan_hdr); + if (data + nh_off > data_end) + return rc; + h_proto = vhdr->h_vlan_encapsulated_proto; + } + if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { + struct vlan_hdr *vhdr; + + vhdr = data + nh_off; + nh_off += sizeof(struct vlan_hdr); + if (data + nh_off > data_end) + return rc; + h_proto = vhdr->h_vlan_encapsulated_proto; + } + + if (h_proto == htons(ETH_P_IP)) + index = parse_ipv4(data, nh_off, data_end); + else if (h_proto == htons(ETH_P_IPV6)) + index = parse_ipv6(data, nh_off, data_end); + else + index = 0; + + value = bpf_map_lookup_elem(&dropcnt, &index); + if (value) + *value += 1; + + return rc; +} + +char _license[] SEC("license") = "GPL"; diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c new file mode 100644 index 0000000..a5e109e --- /dev/null +++ b/samples/bpf/xdp1_user.c @@ -0,0 +1,181 @@ +/* Copyright (c) 2016 PLUMgrid + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#include <linux/bpf.h> +#include <linux/netlink.h> +#include <linux/rtnetlink.h> +#include <assert.h> +#include <errno.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <unistd.h> +#include "bpf_load.h" +#include "libbpf.h" + +static int set_link_xdp_fd(int ifindex, int fd) +{ + struct sockaddr_nl sa; + int sock, seq = 0, len, ret = -1; + char buf[4096]; + struct nlattr *nla, *nla_xdp; + struct { + struct nlmsghdr nh; + struct ifinfomsg ifinfo; + char attrbuf[64]; + } req; + struct nlmsghdr *nh; + struct nlmsgerr *err; + + memset(&sa, 0, sizeof(sa)); + sa.nl_family = AF_NETLINK; + + sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); + if (sock < 0) { + printf("open netlink socket: %s\n", strerror(errno)); + return -1; + } + + if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { + printf("bind to netlink: %s\n", strerror(errno)); + goto cleanup; + } + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(struct ifinfomsg)); + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_type = RTM_SETLINK; + req.nh.nlmsg_pid = 0; + req.nh.nlmsg_seq = ++seq; + req.ifinfo.ifi_family = AF_UNSPEC; + req.ifinfo.ifi_index = ifindex; + nla = (struct nlattr *)(((char *)&req) + + NLMSG_ALIGN(req.nh.nlmsg_len)); + nla->nla_type = NLA_F_NESTED | 43/*IFLA_XDP*/; + + nla_xdp = (struct nlattr *)((char *)nla + NLA_HDRLEN); + nla_xdp->nla_type = 1/*IFLA_XDP_FD*/; + nla_xdp->nla_len = NLA_HDRLEN + sizeof(int); + memcpy((char *)nla_xdp + NLA_HDRLEN, &fd, sizeof(fd)); + nla->nla_len = NLA_HDRLEN + nla_xdp->nla_len; + + req.nh.nlmsg_len += NLA_ALIGN(nla->nla_len); + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + printf("send to netlink: %s\n", strerror(errno)); + goto cleanup; + } + + len = recv(sock, buf, sizeof(buf), 0); + if (len < 0) { + printf("recv from netlink: %s\n", strerror(errno)); + goto cleanup; + } + + for (nh = (struct nlmsghdr *)buf; NLMSG_OK(nh, len); + nh = NLMSG_NEXT(nh, len)) { + if (nh->nlmsg_pid != getpid()) { + printf("Wrong pid %d, expected %d\n", + nh->nlmsg_pid, getpid()); + goto cleanup; + } + if (nh->nlmsg_seq != seq) { + printf("Wrong seq %d, expected %d\n", + nh->nlmsg_seq, seq); + goto cleanup; + } + switch (nh->nlmsg_type) { + case NLMSG_ERROR: + err = (struct nlmsgerr *)NLMSG_DATA(nh); + if (!err->error) + continue; + printf("nlmsg error %s\n", strerror(-err->error)); + goto cleanup; + case NLMSG_DONE: + break; + } + } + + ret = 0; + +cleanup: + close(sock); + return ret; +} + +static int ifindex; + +static void int_exit(int sig) +{ + set_link_xdp_fd(ifindex, -1); + exit(0); +} + +/* simple per-protocol drop counter + */ +static void poll_stats(int interval) +{ + unsigned int nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + const unsigned int nr_keys = 256; + __u64 values[nr_cpus], prev[nr_keys][nr_cpus]; + __u32 key; + int i; + + memset(prev, 0, sizeof(prev)); + + while (1) { + sleep(interval); + + for (key = 0; key < nr_keys; key++) { + __u64 sum = 0; + + assert(bpf_lookup_elem(map_fd[0], &key, values) == 0); + for (i = 0; i < nr_cpus; i++) + sum += (values[i] - prev[key][i]); + if (sum) + printf("proto %u: %10llu pkt/s\n", + key, sum / interval); + memcpy(prev[key], values, sizeof(values)); + } + } +} + +int main(int ac, char **argv) +{ + char filename[256]; + + snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]); + + if (ac != 2) { + printf("usage: %s IFINDEX\n", argv[0]); + return 1; + } + + ifindex = strtoul(argv[1], NULL, 0); + + if (load_bpf_file(filename)) { + printf("%s", bpf_log_buf); + return 1; + } + + if (!prog_fd[0]) { + printf("load_bpf_file: %s\n", strerror(errno)); + return 1; + } + + signal(SIGINT, int_exit); + + if (set_link_xdp_fd(ifindex, prog_fd[0]) < 0) { + printf("link set xdp fd failed\n"); + return 1; + } + + poll_stats(2); + + return 0; +} diff --git a/samples/bpf/xdp2_kern.c b/samples/bpf/xdp2_kern.c new file mode 100644 index 0000000..38fe7e1 --- /dev/null +++ b/samples/bpf/xdp2_kern.c @@ -0,0 +1,114 @@ +/* Copyright (c) 2016 PLUMgrid + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + */ +#define KBUILD_MODNAME "foo" +#include <uapi/linux/bpf.h> +#include <linux/in.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/if_vlan.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include "bpf_helpers.h" + +struct bpf_map_def SEC("maps") dropcnt = { + .type = BPF_MAP_TYPE_PERCPU_ARRAY, + .key_size = sizeof(u32), + .value_size = sizeof(long), + .max_entries = 256, +}; + +static void swap_src_dst_mac(void *data) +{ + unsigned short *p = data; + unsigned short dst[3]; + + dst[0] = p[0]; + dst[1] = p[1]; + dst[2] = p[2]; + p[0] = p[3]; + p[1] = p[4]; + p[2] = p[5]; + p[3] = dst[0]; + p[4] = dst[1]; + p[5] = dst[2]; +} + +static int parse_ipv4(void *data, u64 nh_off, void *data_end) +{ + struct iphdr *iph = data + nh_off; + + if (iph + 1 > data_end) + return 0; + return iph->protocol; +} + +static int parse_ipv6(void *data, u64 nh_off, void *data_end) +{ + struct ipv6hdr *ip6h = data + nh_off; + + if (ip6h + 1 > data_end) + return 0; + return ip6h->nexthdr; +} + +SEC("xdp1") +int xdp_prog1(struct xdp_md *ctx) +{ + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + struct ethhdr *eth = data; + int rc = XDP_DROP; + long *value; + u16 h_proto; + u64 nh_off; + u32 index; + + nh_off = sizeof(*eth); + if (data + nh_off > data_end) + return rc; + + h_proto = eth->h_proto; + + if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { + struct vlan_hdr *vhdr; + + vhdr = data + nh_off; + nh_off += sizeof(struct vlan_hdr); + if (data + nh_off > data_end) + return rc; + h_proto = vhdr->h_vlan_encapsulated_proto; + } + if (h_proto == htons(ETH_P_8021Q) || h_proto == htons(ETH_P_8021AD)) { + struct vlan_hdr *vhdr; + + vhdr = data + nh_off; + nh_off += sizeof(struct vlan_hdr); + if (data + nh_off > data_end) + return rc; + h_proto = vhdr->h_vlan_encapsulated_proto; + } + + if (h_proto == htons(ETH_P_IP)) + index = parse_ipv4(data, nh_off, data_end); + else if (h_proto == htons(ETH_P_IPV6)) + index = parse_ipv6(data, nh_off, data_end); + else + index = 0; + + value = bpf_map_lookup_elem(&dropcnt, &index); + if (value) + *value += 1; + + if (index == 17) { + swap_src_dst_mac(data); + rc = XDP_TX; + } + + return rc; +} + +char _license[] SEC("license") = "GPL"; |