diff options
author | Matthew Heon <matthew.heon@gmail.com> | 2017-11-01 11:24:59 -0400 |
---|---|---|
committer | Matthew Heon <matthew.heon@gmail.com> | 2017-11-01 11:24:59 -0400 |
commit | a031b83a09a8628435317a03f199cdc18b78262f (patch) | |
tree | bc017a96769ce6de33745b8b0b1304ccf38e9df0 /vendor/github.com/vishvananda/netlink/qdisc_linux.go | |
parent | 2b74391cd5281f6fdf391ff8ad50fd1490f6bf89 (diff) | |
download | podman-a031b83a09a8628435317a03f199cdc18b78262f.tar.gz podman-a031b83a09a8628435317a03f199cdc18b78262f.tar.bz2 podman-a031b83a09a8628435317a03f199cdc18b78262f.zip |
Initial checkin from CRI-O repo
Signed-off-by: Matthew Heon <matthew.heon@gmail.com>
Diffstat (limited to 'vendor/github.com/vishvananda/netlink/qdisc_linux.go')
-rw-r--r-- | vendor/github.com/vishvananda/netlink/qdisc_linux.go | 527 |
1 files changed, 527 insertions, 0 deletions
diff --git a/vendor/github.com/vishvananda/netlink/qdisc_linux.go b/vendor/github.com/vishvananda/netlink/qdisc_linux.go new file mode 100644 index 000000000..2c0deddb3 --- /dev/null +++ b/vendor/github.com/vishvananda/netlink/qdisc_linux.go @@ -0,0 +1,527 @@ +package netlink + +import ( + "fmt" + "io/ioutil" + "strconv" + "strings" + "syscall" + + "github.com/vishvananda/netlink/nl" +) + +// NOTE function is here because it uses other linux functions +func NewNetem(attrs QdiscAttrs, nattrs NetemQdiscAttrs) *Netem { + var limit uint32 = 1000 + var lossCorr, delayCorr, duplicateCorr uint32 + var reorderProb, reorderCorr uint32 + var corruptProb, corruptCorr uint32 + + latency := nattrs.Latency + loss := Percentage2u32(nattrs.Loss) + gap := nattrs.Gap + duplicate := Percentage2u32(nattrs.Duplicate) + jitter := nattrs.Jitter + + // Correlation + if latency > 0 && jitter > 0 { + delayCorr = Percentage2u32(nattrs.DelayCorr) + } + if loss > 0 { + lossCorr = Percentage2u32(nattrs.LossCorr) + } + if duplicate > 0 { + duplicateCorr = Percentage2u32(nattrs.DuplicateCorr) + } + // FIXME should validate values(like loss/duplicate are percentages...) + latency = time2Tick(latency) + + if nattrs.Limit != 0 { + limit = nattrs.Limit + } + // Jitter is only value if latency is > 0 + if latency > 0 { + jitter = time2Tick(jitter) + } + + reorderProb = Percentage2u32(nattrs.ReorderProb) + reorderCorr = Percentage2u32(nattrs.ReorderCorr) + + if reorderProb > 0 { + // ERROR if lantency == 0 + if gap == 0 { + gap = 1 + } + } + + corruptProb = Percentage2u32(nattrs.CorruptProb) + corruptCorr = Percentage2u32(nattrs.CorruptCorr) + + return &Netem{ + QdiscAttrs: attrs, + Latency: latency, + DelayCorr: delayCorr, + Limit: limit, + Loss: loss, + LossCorr: lossCorr, + Gap: gap, + Duplicate: duplicate, + DuplicateCorr: duplicateCorr, + Jitter: jitter, + ReorderProb: reorderProb, + ReorderCorr: reorderCorr, + CorruptProb: corruptProb, + CorruptCorr: corruptCorr, + } +} + +// QdiscDel will delete a qdisc from the system. +// Equivalent to: `tc qdisc del $qdisc` +func QdiscDel(qdisc Qdisc) error { + return pkgHandle.QdiscDel(qdisc) +} + +// QdiscDel will delete a qdisc from the system. +// Equivalent to: `tc qdisc del $qdisc` +func (h *Handle) QdiscDel(qdisc Qdisc) error { + return h.qdiscModify(syscall.RTM_DELQDISC, 0, qdisc) +} + +// QdiscChange will change a qdisc in place +// Equivalent to: `tc qdisc change $qdisc` +// The parent and handle MUST NOT be changed. +func QdiscChange(qdisc Qdisc) error { + return pkgHandle.QdiscChange(qdisc) +} + +// QdiscChange will change a qdisc in place +// Equivalent to: `tc qdisc change $qdisc` +// The parent and handle MUST NOT be changed. +func (h *Handle) QdiscChange(qdisc Qdisc) error { + return h.qdiscModify(syscall.RTM_NEWQDISC, 0, qdisc) +} + +// QdiscReplace will replace a qdisc to the system. +// Equivalent to: `tc qdisc replace $qdisc` +// The handle MUST change. +func QdiscReplace(qdisc Qdisc) error { + return pkgHandle.QdiscReplace(qdisc) +} + +// QdiscReplace will replace a qdisc to the system. +// Equivalent to: `tc qdisc replace $qdisc` +// The handle MUST change. +func (h *Handle) QdiscReplace(qdisc Qdisc) error { + return h.qdiscModify( + syscall.RTM_NEWQDISC, + syscall.NLM_F_CREATE|syscall.NLM_F_REPLACE, + qdisc) +} + +// QdiscAdd will add a qdisc to the system. +// Equivalent to: `tc qdisc add $qdisc` +func QdiscAdd(qdisc Qdisc) error { + return pkgHandle.QdiscAdd(qdisc) +} + +// QdiscAdd will add a qdisc to the system. +// Equivalent to: `tc qdisc add $qdisc` +func (h *Handle) QdiscAdd(qdisc Qdisc) error { + return h.qdiscModify( + syscall.RTM_NEWQDISC, + syscall.NLM_F_CREATE|syscall.NLM_F_EXCL, + qdisc) +} + +func (h *Handle) qdiscModify(cmd, flags int, qdisc Qdisc) error { + req := h.newNetlinkRequest(cmd, flags|syscall.NLM_F_ACK) + base := qdisc.Attrs() + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: int32(base.LinkIndex), + Handle: base.Handle, + Parent: base.Parent, + } + req.AddData(msg) + + // When deleting don't bother building the rest of the netlink payload + if cmd != syscall.RTM_DELQDISC { + if err := qdiscPayload(req, qdisc); err != nil { + return err + } + } + + _, err := req.Execute(syscall.NETLINK_ROUTE, 0) + return err +} + +func qdiscPayload(req *nl.NetlinkRequest, qdisc Qdisc) error { + + req.AddData(nl.NewRtAttr(nl.TCA_KIND, nl.ZeroTerminated(qdisc.Type()))) + + options := nl.NewRtAttr(nl.TCA_OPTIONS, nil) + if prio, ok := qdisc.(*Prio); ok { + tcmap := nl.TcPrioMap{ + Bands: int32(prio.Bands), + Priomap: prio.PriorityMap, + } + options = nl.NewRtAttr(nl.TCA_OPTIONS, tcmap.Serialize()) + } else if tbf, ok := qdisc.(*Tbf); ok { + opt := nl.TcTbfQopt{} + opt.Rate.Rate = uint32(tbf.Rate) + opt.Peakrate.Rate = uint32(tbf.Peakrate) + opt.Limit = tbf.Limit + opt.Buffer = tbf.Buffer + nl.NewRtAttrChild(options, nl.TCA_TBF_PARMS, opt.Serialize()) + if tbf.Rate >= uint64(1<<32) { + nl.NewRtAttrChild(options, nl.TCA_TBF_RATE64, nl.Uint64Attr(tbf.Rate)) + } + if tbf.Peakrate >= uint64(1<<32) { + nl.NewRtAttrChild(options, nl.TCA_TBF_PRATE64, nl.Uint64Attr(tbf.Peakrate)) + } + if tbf.Peakrate > 0 { + nl.NewRtAttrChild(options, nl.TCA_TBF_PBURST, nl.Uint32Attr(tbf.Minburst)) + } + } else if htb, ok := qdisc.(*Htb); ok { + opt := nl.TcHtbGlob{} + opt.Version = htb.Version + opt.Rate2Quantum = htb.Rate2Quantum + opt.Defcls = htb.Defcls + // TODO: Handle Debug properly. For now default to 0 + opt.Debug = htb.Debug + opt.DirectPkts = htb.DirectPkts + nl.NewRtAttrChild(options, nl.TCA_HTB_INIT, opt.Serialize()) + // nl.NewRtAttrChild(options, nl.TCA_HTB_DIRECT_QLEN, opt.Serialize()) + } else if netem, ok := qdisc.(*Netem); ok { + opt := nl.TcNetemQopt{} + opt.Latency = netem.Latency + opt.Limit = netem.Limit + opt.Loss = netem.Loss + opt.Gap = netem.Gap + opt.Duplicate = netem.Duplicate + opt.Jitter = netem.Jitter + options = nl.NewRtAttr(nl.TCA_OPTIONS, opt.Serialize()) + // Correlation + corr := nl.TcNetemCorr{} + corr.DelayCorr = netem.DelayCorr + corr.LossCorr = netem.LossCorr + corr.DupCorr = netem.DuplicateCorr + + if corr.DelayCorr > 0 || corr.LossCorr > 0 || corr.DupCorr > 0 { + nl.NewRtAttrChild(options, nl.TCA_NETEM_CORR, corr.Serialize()) + } + // Corruption + corruption := nl.TcNetemCorrupt{} + corruption.Probability = netem.CorruptProb + corruption.Correlation = netem.CorruptCorr + if corruption.Probability > 0 { + nl.NewRtAttrChild(options, nl.TCA_NETEM_CORRUPT, corruption.Serialize()) + } + // Reorder + reorder := nl.TcNetemReorder{} + reorder.Probability = netem.ReorderProb + reorder.Correlation = netem.ReorderCorr + if reorder.Probability > 0 { + nl.NewRtAttrChild(options, nl.TCA_NETEM_REORDER, reorder.Serialize()) + } + } else if _, ok := qdisc.(*Ingress); ok { + // ingress filters must use the proper handle + if qdisc.Attrs().Parent != HANDLE_INGRESS { + return fmt.Errorf("Ingress filters must set Parent to HANDLE_INGRESS") + } + } + + req.AddData(options) + return nil +} + +// QdiscList gets a list of qdiscs in the system. +// Equivalent to: `tc qdisc show`. +// The list can be filtered by link. +func QdiscList(link Link) ([]Qdisc, error) { + return pkgHandle.QdiscList(link) +} + +// QdiscList gets a list of qdiscs in the system. +// Equivalent to: `tc qdisc show`. +// The list can be filtered by link. +func (h *Handle) QdiscList(link Link) ([]Qdisc, error) { + req := h.newNetlinkRequest(syscall.RTM_GETQDISC, syscall.NLM_F_DUMP) + index := int32(0) + if link != nil { + base := link.Attrs() + h.ensureIndex(base) + index = int32(base.Index) + } + msg := &nl.TcMsg{ + Family: nl.FAMILY_ALL, + Ifindex: index, + } + req.AddData(msg) + + msgs, err := req.Execute(syscall.NETLINK_ROUTE, syscall.RTM_NEWQDISC) + if err != nil { + return nil, err + } + + var res []Qdisc + for _, m := range msgs { + msg := nl.DeserializeTcMsg(m) + + attrs, err := nl.ParseRouteAttr(m[msg.Len():]) + if err != nil { + return nil, err + } + + // skip qdiscs from other interfaces + if link != nil && msg.Ifindex != index { + continue + } + + base := QdiscAttrs{ + LinkIndex: int(msg.Ifindex), + Handle: msg.Handle, + Parent: msg.Parent, + Refcnt: msg.Info, + } + var qdisc Qdisc + qdiscType := "" + for _, attr := range attrs { + switch attr.Attr.Type { + case nl.TCA_KIND: + qdiscType = string(attr.Value[:len(attr.Value)-1]) + switch qdiscType { + case "pfifo_fast": + qdisc = &PfifoFast{} + case "prio": + qdisc = &Prio{} + case "tbf": + qdisc = &Tbf{} + case "ingress": + qdisc = &Ingress{} + case "htb": + qdisc = &Htb{} + case "netem": + qdisc = &Netem{} + default: + qdisc = &GenericQdisc{QdiscType: qdiscType} + } + case nl.TCA_OPTIONS: + switch qdiscType { + case "pfifo_fast": + // pfifo returns TcPrioMap directly without wrapping it in rtattr + if err := parsePfifoFastData(qdisc, attr.Value); err != nil { + return nil, err + } + case "prio": + // prio returns TcPrioMap directly without wrapping it in rtattr + if err := parsePrioData(qdisc, attr.Value); err != nil { + return nil, err + } + case "tbf": + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + if err := parseTbfData(qdisc, data); err != nil { + return nil, err + } + case "htb": + data, err := nl.ParseRouteAttr(attr.Value) + if err != nil { + return nil, err + } + if err := parseHtbData(qdisc, data); err != nil { + return nil, err + } + case "netem": + if err := parseNetemData(qdisc, attr.Value); err != nil { + return nil, err + } + + // no options for ingress + } + } + } + *qdisc.Attrs() = base + res = append(res, qdisc) + } + + return res, nil +} + +func parsePfifoFastData(qdisc Qdisc, value []byte) error { + pfifo := qdisc.(*PfifoFast) + tcmap := nl.DeserializeTcPrioMap(value) + pfifo.PriorityMap = tcmap.Priomap + pfifo.Bands = uint8(tcmap.Bands) + return nil +} + +func parsePrioData(qdisc Qdisc, value []byte) error { + prio := qdisc.(*Prio) + tcmap := nl.DeserializeTcPrioMap(value) + prio.PriorityMap = tcmap.Priomap + prio.Bands = uint8(tcmap.Bands) + return nil +} + +func parseHtbData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { + native = nl.NativeEndian() + htb := qdisc.(*Htb) + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_HTB_INIT: + opt := nl.DeserializeTcHtbGlob(datum.Value) + htb.Version = opt.Version + htb.Rate2Quantum = opt.Rate2Quantum + htb.Defcls = opt.Defcls + htb.Debug = opt.Debug + htb.DirectPkts = opt.DirectPkts + case nl.TCA_HTB_DIRECT_QLEN: + // TODO + //htb.DirectQlen = native.uint32(datum.Value) + } + } + return nil +} + +func parseNetemData(qdisc Qdisc, value []byte) error { + netem := qdisc.(*Netem) + opt := nl.DeserializeTcNetemQopt(value) + netem.Latency = opt.Latency + netem.Limit = opt.Limit + netem.Loss = opt.Loss + netem.Gap = opt.Gap + netem.Duplicate = opt.Duplicate + netem.Jitter = opt.Jitter + data, err := nl.ParseRouteAttr(value[nl.SizeofTcNetemQopt:]) + if err != nil { + return err + } + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_NETEM_CORR: + opt := nl.DeserializeTcNetemCorr(datum.Value) + netem.DelayCorr = opt.DelayCorr + netem.LossCorr = opt.LossCorr + netem.DuplicateCorr = opt.DupCorr + case nl.TCA_NETEM_CORRUPT: + opt := nl.DeserializeTcNetemCorrupt(datum.Value) + netem.CorruptProb = opt.Probability + netem.CorruptCorr = opt.Correlation + case nl.TCA_NETEM_REORDER: + opt := nl.DeserializeTcNetemReorder(datum.Value) + netem.ReorderProb = opt.Probability + netem.ReorderCorr = opt.Correlation + } + } + return nil +} + +func parseTbfData(qdisc Qdisc, data []syscall.NetlinkRouteAttr) error { + native = nl.NativeEndian() + tbf := qdisc.(*Tbf) + for _, datum := range data { + switch datum.Attr.Type { + case nl.TCA_TBF_PARMS: + opt := nl.DeserializeTcTbfQopt(datum.Value) + tbf.Rate = uint64(opt.Rate.Rate) + tbf.Peakrate = uint64(opt.Peakrate.Rate) + tbf.Limit = opt.Limit + tbf.Buffer = opt.Buffer + case nl.TCA_TBF_RATE64: + tbf.Rate = native.Uint64(datum.Value[0:8]) + case nl.TCA_TBF_PRATE64: + tbf.Peakrate = native.Uint64(datum.Value[0:8]) + case nl.TCA_TBF_PBURST: + tbf.Minburst = native.Uint32(datum.Value[0:4]) + } + } + return nil +} + +const ( + TIME_UNITS_PER_SEC = 1000000 +) + +var ( + tickInUsec float64 + clockFactor float64 + hz float64 +) + +func initClock() { + data, err := ioutil.ReadFile("/proc/net/psched") + if err != nil { + return + } + parts := strings.Split(strings.TrimSpace(string(data)), " ") + if len(parts) < 3 { + return + } + var vals [3]uint64 + for i := range vals { + val, err := strconv.ParseUint(parts[i], 16, 32) + if err != nil { + return + } + vals[i] = val + } + // compatibility + if vals[2] == 1000000000 { + vals[0] = vals[1] + } + clockFactor = float64(vals[2]) / TIME_UNITS_PER_SEC + tickInUsec = float64(vals[0]) / float64(vals[1]) * clockFactor + hz = float64(vals[0]) +} + +func TickInUsec() float64 { + if tickInUsec == 0.0 { + initClock() + } + return tickInUsec +} + +func ClockFactor() float64 { + if clockFactor == 0.0 { + initClock() + } + return clockFactor +} + +func Hz() float64 { + if hz == 0.0 { + initClock() + } + return hz +} + +func time2Tick(time uint32) uint32 { + return uint32(float64(time) * TickInUsec()) +} + +func tick2Time(tick uint32) uint32 { + return uint32(float64(tick) / TickInUsec()) +} + +func time2Ktime(time uint32) uint32 { + return uint32(float64(time) * ClockFactor()) +} + +func ktime2Time(ktime uint32) uint32 { + return uint32(float64(ktime) / ClockFactor()) +} + +func burst(rate uint64, buffer uint32) uint32 { + return uint32(float64(rate) * float64(tick2Time(buffer)) / TIME_UNITS_PER_SEC) +} + +func latency(rate uint64, limit, buffer uint32) float64 { + return TIME_UNITS_PER_SEC*(float64(limit)/float64(rate)) - float64(tick2Time(buffer)) +} + +func Xmittime(rate uint64, size uint32) float64 { + return TickInUsec() * TIME_UNITS_PER_SEC * (float64(size) / float64(rate)) +} |