func joinMacWithFlowId(inputChan, outputChan chan *store.Record) { var ( session SessionKey localIp []byte ) grouper := transformer.GroupRecords(inputChan, &session, &localIp) for grouper.NextGroup() { var macAddress []byte for grouper.NextRecord() { record := grouper.Read() switch record.DatabaseIndex { case 0: lex.DecodeOrDie(record.Value, &macAddress) case 1: if macAddress != nil { var ( remoteIp []byte sequenceNumber int32 timestamp int64 flowId int32 ) lex.DecodeOrDie(record.Key, &sequenceNumber, &remoteIp, ×tamp, &flowId) outputChan <- &store.Record{ Key: lex.EncodeOrDie(&session, macAddress, remoteIp, timestamp, int64(math.MaxInt64), sequenceNumber, flowId), } } } } } }
func joinDomainsWithWhitelist(inputChan, outputChan chan *store.Record) { var session SessionKey grouper := transformer.GroupRecords(inputChan, &session) for grouper.NextGroup() { var whitelist []string for grouper.NextRecord() { record := grouper.Read() switch record.DatabaseIndex { case 0: lex.DecodeOrDie(record.Value, &whitelist) sort.Sort(sort.StringSlice(whitelist)) case 1: if whitelist == nil { continue } var domain string remainder := lex.DecodeOrDie(record.Key, &domain) for i := 0; i < len(domain); i++ { if i > 0 && domain[i-1] != '.' { continue } idx := sort.SearchStrings(whitelist, domain[i:]) if idx >= len(whitelist) || whitelist[idx] != domain[i:] { continue } outputChan <- &store.Record{ Key: lex.Concatenate(grouper.CurrentGroupPrefix, remainder, lex.EncodeOrDie(whitelist[idx])), } } } } } }
func summarizeFilesystemUsage(inputChan, outputChan chan *store.Record) { var filesystem string var timestamp int64 grouper := transformer.GroupRecords(inputChan, &filesystem, ×tamp) for grouper.NextGroup() { usage := make(map[string]int64) for grouper.NextRecord() { record := grouper.Read() var node string lex.DecodeOrDie(record.Key, &node) var used int64 lex.DecodeOrDie(record.Value, &used) if used > usage[node] { usage[node] = used } } for node, used := range usage { outputChan <- &store.Record{ Key: lex.EncodeOrDie(filesystem, timestamp, node), Value: lex.EncodeOrDie(used), } } } }
func joinAddressIdsWithMacAddresses(inputChan, outputChan chan *store.Record) { var ( session SessionKey addressId int32 ) grouper := transformer.GroupRecords(inputChan, &session, &addressId) for grouper.NextGroup() { var macAddress []byte for grouper.NextRecord() { record := grouper.Read() switch record.DatabaseIndex { case 0: macAddress = record.Value case 1: if macAddress != nil { var unusedSequenceNumber int32 remainder := lex.DecodeOrDie(record.Key, &unusedSequenceNumber) outputChan <- &store.Record{ Key: lex.Concatenate(lex.EncodeOrDie(&session), macAddress, remainder), } } } } } }
func flattenLookupsToNodeMacAndTimestamp(inputChan, outputChan chan *store.Record) { var nodeId, macAddress, domain string grouper := transformer.GroupRecords(inputChan, &nodeId, &macAddress, &domain) for grouper.NextGroup() { totalCounts := make(map[int64]int64) for grouper.NextRecord() { record := grouper.Read() var ( anonymizationContext string sessionId int64 sequenceNumber int32 ) lex.DecodeOrDie(record.Key, &anonymizationContext, &sessionId, &sequenceNumber) var count int64 lex.DecodeOrDie(record.Value, &count) timestamp := truncateTimestampToHour(sessionId + convertSecondsToMicroseconds(30)*int64(sequenceNumber)) totalCounts[timestamp] += count } for timestamp, totalCount := range totalCounts { outputChan <- &store.Record{ Key: lex.EncodeOrDie(nodeId, macAddress, domain, timestamp), Value: lex.EncodeOrDie(totalCount), } } } }
func joinMacWithLookups(inputChan, outputChan chan *store.Record) { var ( session SessionKey addressId int32 ) grouper := transformer.GroupRecords(inputChan, &session, &addressId) for grouper.NextGroup() { var macAddress []byte for grouper.NextRecord() { record := grouper.Read() switch record.DatabaseIndex { case 0: lex.DecodeOrDie(record.Value, &macAddress) case 1: if macAddress != nil { var ( sequenceNumber int32 domain string ) lex.DecodeOrDie(record.Key, &sequenceNumber, &domain) outputChan <- &store.Record{ Key: lex.EncodeOrDie(session.NodeId, macAddress, domain, session.AnonymizationContext, session.SessionId, sequenceNumber), Value: record.Value, } } } } } }
func aggregateStatisticsReducer(inputChan, outputChan chan *store.Record) { var nodeId []byte grouper := transformer.GroupRecords(inputChan, &nodeId) for grouper.NextGroup() { aggregateStatistics := newAggregateStatistics() for grouper.NextRecord() { record := grouper.Read() var statistics AggregateStatistics if err := proto.Unmarshal(record.Value, &statistics); err != nil { panic(err) } *aggregateStatistics.Traces += *statistics.Traces *aggregateStatistics.Packets += *statistics.Packets *aggregateStatistics.PacketSeriesDropped += *statistics.PacketSeriesDropped *aggregateStatistics.PcapDropped += *statistics.PcapDropped *aggregateStatistics.InterfaceDropped += *statistics.InterfaceDropped *aggregateStatistics.Flows += *statistics.Flows *aggregateStatistics.DroppedFlows += *statistics.DroppedFlows *aggregateStatistics.Bytes += *statistics.Bytes } encodedStatistics, err := proto.Marshal(aggregateStatistics) if err != nil { panic(err) } outputChan <- &store.Record{ Key: lex.EncodeOrDie(nodeId), Value: encodedStatistics, } } }
func inferReboots(inputChan, outputChan chan *store.Record) { var node string grouper := transformer.GroupRecords(inputChan, &node) for grouper.NextGroup() { lastUptime := int64(-1) maxReboot := int64(-1) for grouper.NextRecord() { record := grouper.Read() var timestamp int64 lex.DecodeOrDie(record.Key, ×tamp) var uptime int64 lex.DecodeOrDie(record.Value, &uptime) if lastUptime >= 0 && lastUptime > uptime { if maxReboot > -1 { outputChan <- &store.Record{ Key: lex.EncodeOrDie(node, maxReboot), } } maxReboot = int64(-1) } reboot := timestamp - uptime if maxReboot < reboot { maxReboot = reboot } lastUptime = uptime } if maxReboot > -1 { outputChan <- &store.Record{ Key: lex.EncodeOrDie(node, maxReboot), } } } }
func joinMacAndSizes(inputChan, outputChan chan *store.Record) { var session SessionKey var flowId int32 grouper := transformer.GroupRecords(inputChan, &session, &flowId) for grouper.NextGroup() { var currentMacAddresses [][]byte for grouper.NextRecord() { record := grouper.Read() if record.DatabaseIndex == 0 { lex.DecodeOrDie(record.Value, ¤tMacAddresses) continue } if currentMacAddresses == nil { continue } var sequenceNumber int32 lex.DecodeOrDie(record.Key, &sequenceNumber) var timestamps, sizes []int64 lex.DecodeOrDie(record.Value, ×tamps, &sizes) if len(timestamps) != len(sizes) { panic(fmt.Errorf("timestamps and sizes must be the same size")) } for _, currentMacAddress := range currentMacAddresses { for idx, timestamp := range timestamps { outputChan <- &store.Record{ Key: lex.EncodeOrDie(&session, currentMacAddress, timestamp, flowId, sequenceNumber), Value: lex.EncodeOrDie(sizes[idx]), } } } } } }
func summarizeSizesByTimestamp(inputChan, outputChan chan *store.Record) { quantileComputer := NewQuantileSample(101) quantilesToKeep := []int{0, 1, 5, 10, 25, 50, 75, 90, 95, 99, 100} var experiment, node string var timestamp int64 grouper := transformer.GroupRecords(inputChan, &experiment, &node, ×tamp) for grouper.NextGroup() { for grouper.NextRecord() { record := grouper.Read() var statsValue StatsValue lex.DecodeOrDie(record.Value, &statsValue) quantileComputer.Append(statsValue.Size) } count := int64(quantileComputer.Count()) quantiles := quantileComputer.Quantiles() quantileComputer.Reset() statistics := []interface{}{count} for idx := range quantilesToKeep { statistics = append(statistics, quantiles[idx]) } outputChan <- &store.Record{ Key: lex.EncodeOrDie(experiment, node, timestamp), Value: lex.EncodeOrDie(statistics...), } } }
func summarizeInterarrivalTimes(inputChan, outputChan chan *store.Record) { quantileComputer := NewQuantileSample(21) var experiment, node string grouper := transformer.GroupRecords(inputChan, &experiment, &node) for grouper.NextGroup() { for grouper.NextRecord() { record := grouper.Read() var timestamp int64 lex.DecodeOrDie(record.Key, ×tamp) quantileComputer.Append(timestamp) } count := int64(quantileComputer.Count()) quantiles := quantileComputer.Quantiles() statistics := []interface{}{count} for _, q := range quantiles { statistics = append(statistics, q) } quantileComputer.Reset() outputChan <- &store.Record{ Key: lex.EncodeOrDie(experiment, node), Value: lex.EncodeOrDie(statistics...), } } }
func joinMacAndFlowId(inputChan, outputChan chan *store.Record) { var session SessionKey var ipAddress []byte grouper := transformer.GroupRecords(inputChan, &session, &ipAddress) for grouper.NextGroup() { var currentMacAddress []byte for grouper.NextRecord() { record := grouper.Read() if record.DatabaseIndex == 0 { currentMacAddress = record.Value continue } if currentMacAddress == nil { continue } var sequenceNumber int32 lex.DecodeOrDie(record.Key, &sequenceNumber) var flowIds []int32 lex.DecodeOrDie(record.Value, &flowIds) for _, flowId := range flowIds { outputChan <- &store.Record{ Key: lex.Concatenate(lex.EncodeOrDie(&session, flowId, sequenceNumber), currentMacAddress), } } } } }
func joinDomainsWithSizes(inputChan, outputChan chan *store.Record) { var ( session SessionKey flowId int32 ) grouper := transformer.GroupRecords(inputChan, &session, &flowId) for grouper.NextGroup() { var domains, macAddresses [][]byte for grouper.NextRecord() { record := grouper.Read() switch record.DatabaseIndex { case 0: lex.DecodeOrDie(record.Value, &domains, &macAddresses) case 1: if domains != nil && macAddresses != nil { var ( sequenceNumber int32 timestamp int64 ) lex.DecodeOrDie(record.Key, &sequenceNumber, ×tamp) for idx, domain := range domains { outputChan <- &store.Record{ Key: lex.EncodeOrDie(session.NodeId, domain, timestamp, macAddresses[idx], session.AnonymizationContext, session.SessionId, flowId, sequenceNumber), Value: record.Value, } } } } } } }
func aggregateStatisticsReduceBySession(inputChan, outputChan chan *store.Record) { var session SessionKey grouper := transformer.GroupRecords(inputChan, &session) for grouper.NextGroup() { aggregateStatistics := newAggregateStatistics() var pcapDropped, interfaceDropped int64 var lastPcapDropped, lastInterfaceDropped int64 var pcapDroppedBaseline, interfaceDroppedBaseline int64 for grouper.NextRecord() { record := grouper.Read() var statistics AggregateStatistics if err := proto.Unmarshal(record.Value, &statistics); err != nil { panic(err) } if lastPcapDropped > *statistics.PcapDropped { pcapDroppedBaseline += math.MaxUint32 pcapDropped = 0 } lastPcapDropped = *statistics.PcapDropped pcapDropped = maxInt64(pcapDropped, *statistics.PcapDropped) if lastInterfaceDropped > *statistics.InterfaceDropped { interfaceDroppedBaseline += math.MaxUint32 interfaceDropped = 0 } lastInterfaceDropped = *statistics.InterfaceDropped interfaceDropped = maxInt64(interfaceDropped, *statistics.InterfaceDropped) *aggregateStatistics.Traces += *statistics.Traces *aggregateStatistics.Packets += *statistics.Packets *aggregateStatistics.PacketSeriesDropped += *statistics.PacketSeriesDropped *aggregateStatistics.Flows += *statistics.Flows *aggregateStatistics.DroppedFlows += *statistics.DroppedFlows *aggregateStatistics.Bytes += *statistics.Bytes } *aggregateStatistics.PcapDropped = pcapDroppedBaseline + pcapDropped *aggregateStatistics.InterfaceDropped = interfaceDroppedBaseline + interfaceDropped encodedStatistics, err := proto.Marshal(aggregateStatistics) if err != nil { panic(err) } outputChan <- &store.Record{ Key: lex.EncodeOrDie(&session), Value: encodedStatistics, } } }
func flattenLookupsToNodeAndMac(inputChan, outputChan chan *store.Record) { var nodeId, macAddress, domain string grouper := transformer.GroupRecords(inputChan, &nodeId, &macAddress, &domain) for grouper.NextGroup() { var totalCount int64 for grouper.NextRecord() { record := grouper.Read() var count int64 lex.DecodeOrDie(record.Value, &count) totalCount += count } outputChan <- &store.Record{ Key: lex.EncodeOrDie(nodeId, macAddress, domain), Value: lex.EncodeOrDie(totalCount), } } }
func reduceBytesPerDevice(inputChan, outputChan chan *store.Record) { var nodeId, macAddress []byte var timestamp int64 grouper := transformer.GroupRecords(inputChan, &nodeId, &macAddress, ×tamp) for grouper.NextGroup() { var totalSize int64 for grouper.NextRecord() { record := grouper.Read() var size int64 lex.DecodeOrDie(record.Value, &size) totalSize += size } outputChan <- &store.Record{ Key: lex.EncodeOrDie(nodeId, macAddress, timestamp), Value: lex.EncodeOrDie(totalSize), } } }
func flattenMacAddresses(inputChan, outputChan chan *store.Record) { var session SessionKey var flowId, sequenceNumber int32 grouper := transformer.GroupRecords(inputChan, &session, &flowId, &sequenceNumber) for grouper.NextGroup() { macAddresses := [][]byte{} for grouper.NextRecord() { record := grouper.Read() var macAddress []byte lex.DecodeOrDie(record.Key, &macAddress) macAddresses = append(macAddresses, macAddress) } outputChan <- &store.Record{ Key: lex.EncodeOrDie(&session, flowId, sequenceNumber), Value: lex.EncodeOrDie(macAddresses), } } }
func reduceBytesPerDeviceSession(inputChan, outputChan chan *store.Record) { var session SessionKey var macAddress []byte var timestamp int64 grouper := transformer.GroupRecords(inputChan, &session, &macAddress, ×tamp) for grouper.NextGroup() { var totalSize int64 for grouper.NextRecord() { record := grouper.Read() var size int64 lex.DecodeOrDie(record.Value, &size) totalSize += size } outputChan <- &store.Record{ Key: lex.EncodeOrDie(session.NodeId, macAddress, timestamp, session.AnonymizationContext, session.SessionId), Value: lex.EncodeOrDie(totalSize), } } }
func flattenIntoBytesPerTimestamp(inputChan, outputChan chan *store.Record) { var ( nodeId, domain []byte timestamp int64 ) grouper := transformer.GroupRecords(inputChan, &nodeId, &domain, ×tamp) for grouper.NextGroup() { var totalSize int64 for grouper.NextRecord() { record := grouper.Read() var size int64 lex.DecodeOrDie(record.Value, &size) totalSize += size } outputChan <- &store.Record{ Key: lex.EncodeOrDie(nodeId, domain, timestamp), Value: lex.EncodeOrDie(totalSize), } } }
func summarizeSizesPerDay(inputChan, outputChan chan *store.Record) { var experiment, node string grouper := transformer.GroupRecords(inputChan, &experiment, &node) for grouper.NextGroup() { sizePerDay := make(map[int64]int64) for grouper.NextRecord() { record := grouper.Read() var statsValue StatsValue lex.DecodeOrDie(record.Value, &statsValue) roundedTimestamp := truncateTimestampToDay(statsValue.ReceivedTimestamp) sizePerDay[roundedTimestamp] += statsValue.Size } for timestamp, size := range sizePerDay { outputChan <- &store.Record{ Key: lex.EncodeOrDie(experiment, node, timestamp), Value: lex.EncodeOrDie(size), } } } }
func groupDomainsAndMacAddresses(inputChan, outputChan chan *store.Record) { var ( session SessionKey flowId, sequenceNumber int32 ) grouper := transformer.GroupRecords(inputChan, &session, &flowId, &sequenceNumber) for grouper.NextGroup() { var domains, macAddresses [][]byte for grouper.NextRecord() { record := grouper.Read() var domain, macAddress []byte lex.DecodeOrDie(record.Key, &domain, &macAddress) domains = append(domains, domain) macAddresses = append(macAddresses, macAddress) } outputChan <- &store.Record{ Key: grouper.CurrentGroupPrefix, Value: lex.EncodeOrDie(domains, macAddresses), } } }
func joinWhitelistedDomainsWithFlows(inputChan, outputChan chan *store.Record) { var ( session SessionKey macAddress, remoteIp []byte ) grouper := transformer.GroupRecords(inputChan, &session, &macAddress, &remoteIp) for grouper.NextGroup() { type timestampsAndDomain struct { start, end int64 domain []byte } var domains []*timestampsAndDomain for grouper.NextRecord() { record := grouper.Read() switch record.DatabaseIndex { case 0: var newEntry timestampsAndDomain lex.DecodeOrDie(record.Key, &newEntry.start, &newEntry.end, &newEntry.domain) domains = append(domains, &newEntry) case 1: if domains != nil { var ( timestamp, unusedInfinity int64 sequenceNumber, flowId int32 ) lex.DecodeOrDie(record.Key, ×tamp, &unusedInfinity, &sequenceNumber, &flowId) for _, entry := range domains { if entry.start <= timestamp && entry.end >= timestamp { outputChan <- &store.Record{ Key: lex.EncodeOrDie(&session, flowId, sequenceNumber, entry.domain, macAddress), } } } } } } } }
func computeInterarrivalTimes(inputChan, outputChan chan *store.Record) { var experiment, node string grouper := transformer.GroupRecords(inputChan, &experiment, &node) for grouper.NextGroup() { var lastTimestamp int64 for grouper.NextRecord() { record := grouper.Read() var timestamp int64 var filename string lex.DecodeOrDie(record.Key, ×tamp, &filename) if lastTimestamp > 0 { interarrivalTime := timestamp - lastTimestamp outputChan <- &store.Record{ Key: lex.EncodeOrDie(experiment, node, interarrivalTime, filename), } } lastTimestamp = timestamp } } }
func detectChangedPackageVersions(inputChan, outputChan chan *store.Record) { var node, packageName string grouper := transformer.GroupRecords(inputChan, &node, &packageName) for grouper.NextGroup() { var lastVersion string for grouper.NextRecord() { record := grouper.Read() var timestamp int64 lex.DecodeOrDie(record.Key, ×tamp) var version string lex.DecodeOrDie(record.Value, &version) if version != lastVersion { outputChan <- &store.Record{ Key: lex.EncodeOrDie(node, packageName, timestamp), Value: lex.EncodeOrDie(version), } } lastVersion = version } } }
func joinARecordsWithCnameRecords(inputChan, outputChan chan *store.Record) { var ( session SessionKey macAddress, domain []byte anonymized bool ) grouper := transformer.GroupRecords(inputChan, &session, &macAddress, &domain, &anonymized) for grouper.NextGroup() { var allRecords dnsRecords for grouper.NextRecord() { record := grouper.Read() newStartDnsRecord := dnsRecord{startEvent: true, aRecord: record.DatabaseIndex == 0} newEndDnsRecord := dnsRecord{startEvent: false, aRecord: record.DatabaseIndex == 0} lex.DecodeOrDie(record.Key, &newStartDnsRecord.timestamp, &newEndDnsRecord.timestamp, &newStartDnsRecord.value) newEndDnsRecord.value = newStartDnsRecord.value allRecords = append(allRecords, &newStartDnsRecord) allRecords = append(allRecords, &newEndDnsRecord) } sort.Sort(allRecords) currentAValues := make(map[string]int64) currentCnameValues := make(map[string]int64) currentACounts := make(map[string]int) currentCnameCounts := make(map[string]int) for _, record := range allRecords { switch record.aRecord { case true: switch record.startEvent { case true: timestamp := record.timestamp if oldTimestamp, ok := currentAValues[record.value]; ok { timestamp = minInt64(timestamp, oldTimestamp) } currentAValues[record.value] = timestamp currentACounts[record.value]++ case false: currentACounts[record.value]-- if currentACounts[record.value] == 0 { startTimestamp := currentAValues[record.value] delete(currentAValues, record.value) for domain, timestamp := range currentCnameValues { outputChan <- &store.Record{ Key: lex.EncodeOrDie(&session, domain, macAddress, record.value, maxInt64(startTimestamp, timestamp), record.timestamp), } } } } case false: switch record.startEvent { case true: timestamp := record.timestamp if oldTimestamp, ok := currentCnameValues[record.value]; ok { timestamp = minInt64(timestamp, oldTimestamp) } currentCnameValues[record.value] = timestamp currentCnameCounts[record.value]++ case false: currentCnameCounts[record.value]-- if currentCnameCounts[record.value] == 0 { startTimestamp := currentCnameValues[record.value] delete(currentCnameValues, record.value) for ip, timestamp := range currentAValues { outputChan <- &store.Record{ Key: lex.EncodeOrDie(&session, record.value, macAddress, ip, maxInt64(startTimestamp, timestamp), record.timestamp), } } } } } } } }