func joinMacWithFlowId(inputChan, outputChan chan *store.Record) {
	var (
		session SessionKey
		localIp []byte
	)
	grouper := transformer.GroupRecords(inputChan, &session, &localIp)
	for grouper.NextGroup() {
		var macAddress []byte
		for grouper.NextRecord() {
			record := grouper.Read()
			switch record.DatabaseIndex {
			case 0:
				lex.DecodeOrDie(record.Value, &macAddress)
			case 1:
				if macAddress != nil {
					var (
						remoteIp       []byte
						sequenceNumber int32
						timestamp      int64
						flowId         int32
					)
					lex.DecodeOrDie(record.Key, &sequenceNumber, &remoteIp, &timestamp, &flowId)
					outputChan <- &store.Record{
						Key: lex.EncodeOrDie(&session, macAddress, remoteIp, timestamp, int64(math.MaxInt64), sequenceNumber, flowId),
					}
				}
			}
		}
	}
}
func joinDomainsWithWhitelist(inputChan, outputChan chan *store.Record) {
	var session SessionKey
	grouper := transformer.GroupRecords(inputChan, &session)
	for grouper.NextGroup() {
		var whitelist []string
		for grouper.NextRecord() {
			record := grouper.Read()

			switch record.DatabaseIndex {
			case 0:
				lex.DecodeOrDie(record.Value, &whitelist)
				sort.Sort(sort.StringSlice(whitelist))
			case 1:
				if whitelist == nil {
					continue
				}
				var domain string
				remainder := lex.DecodeOrDie(record.Key, &domain)
				for i := 0; i < len(domain); i++ {
					if i > 0 && domain[i-1] != '.' {
						continue
					}
					idx := sort.SearchStrings(whitelist, domain[i:])
					if idx >= len(whitelist) || whitelist[idx] != domain[i:] {
						continue
					}
					outputChan <- &store.Record{
						Key: lex.Concatenate(grouper.CurrentGroupPrefix, remainder, lex.EncodeOrDie(whitelist[idx])),
					}
				}
			}
		}
	}
}
Example #3
0
func summarizeFilesystemUsage(inputChan, outputChan chan *store.Record) {
	var filesystem string
	var timestamp int64
	grouper := transformer.GroupRecords(inputChan, &filesystem, &timestamp)
	for grouper.NextGroup() {
		usage := make(map[string]int64)
		for grouper.NextRecord() {
			record := grouper.Read()
			var node string
			lex.DecodeOrDie(record.Key, &node)
			var used int64
			lex.DecodeOrDie(record.Value, &used)

			if used > usage[node] {
				usage[node] = used
			}
		}
		for node, used := range usage {
			outputChan <- &store.Record{
				Key:   lex.EncodeOrDie(filesystem, timestamp, node),
				Value: lex.EncodeOrDie(used),
			}
		}
	}
}
func joinAddressIdsWithMacAddresses(inputChan, outputChan chan *store.Record) {
	var (
		session   SessionKey
		addressId int32
	)
	grouper := transformer.GroupRecords(inputChan, &session, &addressId)
	for grouper.NextGroup() {
		var macAddress []byte
		for grouper.NextRecord() {
			record := grouper.Read()
			switch record.DatabaseIndex {
			case 0:
				macAddress = record.Value
			case 1:
				if macAddress != nil {
					var unusedSequenceNumber int32
					remainder := lex.DecodeOrDie(record.Key, &unusedSequenceNumber)
					outputChan <- &store.Record{
						Key: lex.Concatenate(lex.EncodeOrDie(&session), macAddress, remainder),
					}
				}
			}
		}
	}
}
func flattenLookupsToNodeMacAndTimestamp(inputChan, outputChan chan *store.Record) {
	var nodeId, macAddress, domain string
	grouper := transformer.GroupRecords(inputChan, &nodeId, &macAddress, &domain)
	for grouper.NextGroup() {
		totalCounts := make(map[int64]int64)
		for grouper.NextRecord() {
			record := grouper.Read()
			var (
				anonymizationContext string
				sessionId            int64
				sequenceNumber       int32
			)
			lex.DecodeOrDie(record.Key, &anonymizationContext, &sessionId, &sequenceNumber)
			var count int64
			lex.DecodeOrDie(record.Value, &count)
			timestamp := truncateTimestampToHour(sessionId + convertSecondsToMicroseconds(30)*int64(sequenceNumber))
			totalCounts[timestamp] += count
		}
		for timestamp, totalCount := range totalCounts {
			outputChan <- &store.Record{
				Key:   lex.EncodeOrDie(nodeId, macAddress, domain, timestamp),
				Value: lex.EncodeOrDie(totalCount),
			}
		}
	}
}
func joinMacWithLookups(inputChan, outputChan chan *store.Record) {
	var (
		session   SessionKey
		addressId int32
	)
	grouper := transformer.GroupRecords(inputChan, &session, &addressId)
	for grouper.NextGroup() {
		var macAddress []byte
		for grouper.NextRecord() {
			record := grouper.Read()
			switch record.DatabaseIndex {
			case 0:
				lex.DecodeOrDie(record.Value, &macAddress)
			case 1:
				if macAddress != nil {
					var (
						sequenceNumber int32
						domain         string
					)
					lex.DecodeOrDie(record.Key, &sequenceNumber, &domain)
					outputChan <- &store.Record{
						Key:   lex.EncodeOrDie(session.NodeId, macAddress, domain, session.AnonymizationContext, session.SessionId, sequenceNumber),
						Value: record.Value,
					}
				}
			}
		}
	}
}
func aggregateStatisticsReducer(inputChan, outputChan chan *store.Record) {
	var nodeId []byte
	grouper := transformer.GroupRecords(inputChan, &nodeId)
	for grouper.NextGroup() {
		aggregateStatistics := newAggregateStatistics()
		for grouper.NextRecord() {
			record := grouper.Read()
			var statistics AggregateStatistics
			if err := proto.Unmarshal(record.Value, &statistics); err != nil {
				panic(err)
			}
			*aggregateStatistics.Traces += *statistics.Traces
			*aggregateStatistics.Packets += *statistics.Packets
			*aggregateStatistics.PacketSeriesDropped += *statistics.PacketSeriesDropped
			*aggregateStatistics.PcapDropped += *statistics.PcapDropped
			*aggregateStatistics.InterfaceDropped += *statistics.InterfaceDropped
			*aggregateStatistics.Flows += *statistics.Flows
			*aggregateStatistics.DroppedFlows += *statistics.DroppedFlows
			*aggregateStatistics.Bytes += *statistics.Bytes
		}
		encodedStatistics, err := proto.Marshal(aggregateStatistics)
		if err != nil {
			panic(err)
		}
		outputChan <- &store.Record{
			Key:   lex.EncodeOrDie(nodeId),
			Value: encodedStatistics,
		}
	}
}
Example #8
0
func inferReboots(inputChan, outputChan chan *store.Record) {
	var node string
	grouper := transformer.GroupRecords(inputChan, &node)
	for grouper.NextGroup() {
		lastUptime := int64(-1)
		maxReboot := int64(-1)
		for grouper.NextRecord() {
			record := grouper.Read()
			var timestamp int64
			lex.DecodeOrDie(record.Key, &timestamp)
			var uptime int64
			lex.DecodeOrDie(record.Value, &uptime)

			if lastUptime >= 0 && lastUptime > uptime {
				if maxReboot > -1 {
					outputChan <- &store.Record{
						Key: lex.EncodeOrDie(node, maxReboot),
					}
				}
				maxReboot = int64(-1)
			}
			reboot := timestamp - uptime
			if maxReboot < reboot {
				maxReboot = reboot
			}
			lastUptime = uptime
		}

		if maxReboot > -1 {
			outputChan <- &store.Record{
				Key: lex.EncodeOrDie(node, maxReboot),
			}
		}
	}
}
func joinMacAndSizes(inputChan, outputChan chan *store.Record) {
	var session SessionKey
	var flowId int32
	grouper := transformer.GroupRecords(inputChan, &session, &flowId)
	for grouper.NextGroup() {
		var currentMacAddresses [][]byte
		for grouper.NextRecord() {
			record := grouper.Read()
			if record.DatabaseIndex == 0 {
				lex.DecodeOrDie(record.Value, &currentMacAddresses)
				continue
			}
			if currentMacAddresses == nil {
				continue
			}

			var sequenceNumber int32
			lex.DecodeOrDie(record.Key, &sequenceNumber)
			var timestamps, sizes []int64
			lex.DecodeOrDie(record.Value, &timestamps, &sizes)
			if len(timestamps) != len(sizes) {
				panic(fmt.Errorf("timestamps and sizes must be the same size"))
			}

			for _, currentMacAddress := range currentMacAddresses {
				for idx, timestamp := range timestamps {
					outputChan <- &store.Record{
						Key:   lex.EncodeOrDie(&session, currentMacAddress, timestamp, flowId, sequenceNumber),
						Value: lex.EncodeOrDie(sizes[idx]),
					}
				}
			}
		}
	}
}
Example #10
0
func summarizeSizesByTimestamp(inputChan, outputChan chan *store.Record) {
	quantileComputer := NewQuantileSample(101)
	quantilesToKeep := []int{0, 1, 5, 10, 25, 50, 75, 90, 95, 99, 100}

	var experiment, node string
	var timestamp int64
	grouper := transformer.GroupRecords(inputChan, &experiment, &node, &timestamp)
	for grouper.NextGroup() {
		for grouper.NextRecord() {
			record := grouper.Read()
			var statsValue StatsValue
			lex.DecodeOrDie(record.Value, &statsValue)
			quantileComputer.Append(statsValue.Size)
		}

		count := int64(quantileComputer.Count())
		quantiles := quantileComputer.Quantiles()
		quantileComputer.Reset()

		statistics := []interface{}{count}
		for idx := range quantilesToKeep {
			statistics = append(statistics, quantiles[idx])
		}

		outputChan <- &store.Record{
			Key:   lex.EncodeOrDie(experiment, node, timestamp),
			Value: lex.EncodeOrDie(statistics...),
		}
	}
}
Example #11
0
func summarizeInterarrivalTimes(inputChan, outputChan chan *store.Record) {
	quantileComputer := NewQuantileSample(21)

	var experiment, node string
	grouper := transformer.GroupRecords(inputChan, &experiment, &node)
	for grouper.NextGroup() {
		for grouper.NextRecord() {
			record := grouper.Read()
			var timestamp int64
			lex.DecodeOrDie(record.Key, &timestamp)
			quantileComputer.Append(timestamp)
		}

		count := int64(quantileComputer.Count())
		quantiles := quantileComputer.Quantiles()
		statistics := []interface{}{count}
		for _, q := range quantiles {
			statistics = append(statistics, q)
		}
		quantileComputer.Reset()

		outputChan <- &store.Record{
			Key:   lex.EncodeOrDie(experiment, node),
			Value: lex.EncodeOrDie(statistics...),
		}
	}
}
func joinMacAndFlowId(inputChan, outputChan chan *store.Record) {
	var session SessionKey
	var ipAddress []byte
	grouper := transformer.GroupRecords(inputChan, &session, &ipAddress)
	for grouper.NextGroup() {
		var currentMacAddress []byte
		for grouper.NextRecord() {
			record := grouper.Read()
			if record.DatabaseIndex == 0 {
				currentMacAddress = record.Value
				continue
			}
			if currentMacAddress == nil {
				continue
			}
			var sequenceNumber int32
			lex.DecodeOrDie(record.Key, &sequenceNumber)
			var flowIds []int32
			lex.DecodeOrDie(record.Value, &flowIds)
			for _, flowId := range flowIds {
				outputChan <- &store.Record{
					Key: lex.Concatenate(lex.EncodeOrDie(&session, flowId, sequenceNumber), currentMacAddress),
				}
			}
		}
	}
}
func joinDomainsWithSizes(inputChan, outputChan chan *store.Record) {
	var (
		session SessionKey
		flowId  int32
	)
	grouper := transformer.GroupRecords(inputChan, &session, &flowId)
	for grouper.NextGroup() {
		var domains, macAddresses [][]byte
		for grouper.NextRecord() {
			record := grouper.Read()

			switch record.DatabaseIndex {
			case 0:
				lex.DecodeOrDie(record.Value, &domains, &macAddresses)
			case 1:
				if domains != nil && macAddresses != nil {
					var (
						sequenceNumber int32
						timestamp      int64
					)
					lex.DecodeOrDie(record.Key, &sequenceNumber, &timestamp)
					for idx, domain := range domains {
						outputChan <- &store.Record{
							Key:   lex.EncodeOrDie(session.NodeId, domain, timestamp, macAddresses[idx], session.AnonymizationContext, session.SessionId, flowId, sequenceNumber),
							Value: record.Value,
						}
					}
				}
			}
		}
	}
}
func aggregateStatisticsReduceBySession(inputChan, outputChan chan *store.Record) {
	var session SessionKey
	grouper := transformer.GroupRecords(inputChan, &session)
	for grouper.NextGroup() {
		aggregateStatistics := newAggregateStatistics()
		var pcapDropped, interfaceDropped int64
		var lastPcapDropped, lastInterfaceDropped int64
		var pcapDroppedBaseline, interfaceDroppedBaseline int64
		for grouper.NextRecord() {
			record := grouper.Read()
			var statistics AggregateStatistics
			if err := proto.Unmarshal(record.Value, &statistics); err != nil {
				panic(err)
			}

			if lastPcapDropped > *statistics.PcapDropped {
				pcapDroppedBaseline += math.MaxUint32
				pcapDropped = 0
			}
			lastPcapDropped = *statistics.PcapDropped
			pcapDropped = maxInt64(pcapDropped, *statistics.PcapDropped)
			if lastInterfaceDropped > *statistics.InterfaceDropped {
				interfaceDroppedBaseline += math.MaxUint32
				interfaceDropped = 0
			}
			lastInterfaceDropped = *statistics.InterfaceDropped
			interfaceDropped = maxInt64(interfaceDropped, *statistics.InterfaceDropped)

			*aggregateStatistics.Traces += *statistics.Traces
			*aggregateStatistics.Packets += *statistics.Packets
			*aggregateStatistics.PacketSeriesDropped += *statistics.PacketSeriesDropped
			*aggregateStatistics.Flows += *statistics.Flows
			*aggregateStatistics.DroppedFlows += *statistics.DroppedFlows
			*aggregateStatistics.Bytes += *statistics.Bytes
		}

		*aggregateStatistics.PcapDropped = pcapDroppedBaseline + pcapDropped
		*aggregateStatistics.InterfaceDropped = interfaceDroppedBaseline + interfaceDropped

		encodedStatistics, err := proto.Marshal(aggregateStatistics)
		if err != nil {
			panic(err)
		}
		outputChan <- &store.Record{
			Key:   lex.EncodeOrDie(&session),
			Value: encodedStatistics,
		}
	}
}
func flattenLookupsToNodeAndMac(inputChan, outputChan chan *store.Record) {
	var nodeId, macAddress, domain string
	grouper := transformer.GroupRecords(inputChan, &nodeId, &macAddress, &domain)
	for grouper.NextGroup() {
		var totalCount int64
		for grouper.NextRecord() {
			record := grouper.Read()
			var count int64
			lex.DecodeOrDie(record.Value, &count)
			totalCount += count
		}
		outputChan <- &store.Record{
			Key:   lex.EncodeOrDie(nodeId, macAddress, domain),
			Value: lex.EncodeOrDie(totalCount),
		}
	}
}
func reduceBytesPerDevice(inputChan, outputChan chan *store.Record) {
	var nodeId, macAddress []byte
	var timestamp int64
	grouper := transformer.GroupRecords(inputChan, &nodeId, &macAddress, &timestamp)
	for grouper.NextGroup() {
		var totalSize int64
		for grouper.NextRecord() {
			record := grouper.Read()
			var size int64
			lex.DecodeOrDie(record.Value, &size)
			totalSize += size
		}
		outputChan <- &store.Record{
			Key:   lex.EncodeOrDie(nodeId, macAddress, timestamp),
			Value: lex.EncodeOrDie(totalSize),
		}
	}
}
func flattenMacAddresses(inputChan, outputChan chan *store.Record) {
	var session SessionKey
	var flowId, sequenceNumber int32
	grouper := transformer.GroupRecords(inputChan, &session, &flowId, &sequenceNumber)
	for grouper.NextGroup() {
		macAddresses := [][]byte{}
		for grouper.NextRecord() {
			record := grouper.Read()
			var macAddress []byte
			lex.DecodeOrDie(record.Key, &macAddress)
			macAddresses = append(macAddresses, macAddress)
		}
		outputChan <- &store.Record{
			Key:   lex.EncodeOrDie(&session, flowId, sequenceNumber),
			Value: lex.EncodeOrDie(macAddresses),
		}
	}
}
func reduceBytesPerDeviceSession(inputChan, outputChan chan *store.Record) {
	var session SessionKey
	var macAddress []byte
	var timestamp int64
	grouper := transformer.GroupRecords(inputChan, &session, &macAddress, &timestamp)
	for grouper.NextGroup() {
		var totalSize int64
		for grouper.NextRecord() {
			record := grouper.Read()
			var size int64
			lex.DecodeOrDie(record.Value, &size)
			totalSize += size
		}
		outputChan <- &store.Record{
			Key:   lex.EncodeOrDie(session.NodeId, macAddress, timestamp, session.AnonymizationContext, session.SessionId),
			Value: lex.EncodeOrDie(totalSize),
		}
	}
}
func flattenIntoBytesPerTimestamp(inputChan, outputChan chan *store.Record) {
	var (
		nodeId, domain []byte
		timestamp      int64
	)
	grouper := transformer.GroupRecords(inputChan, &nodeId, &domain, &timestamp)
	for grouper.NextGroup() {
		var totalSize int64
		for grouper.NextRecord() {
			record := grouper.Read()
			var size int64
			lex.DecodeOrDie(record.Value, &size)
			totalSize += size
		}
		outputChan <- &store.Record{
			Key:   lex.EncodeOrDie(nodeId, domain, timestamp),
			Value: lex.EncodeOrDie(totalSize),
		}
	}
}
Example #20
0
func summarizeSizesPerDay(inputChan, outputChan chan *store.Record) {
	var experiment, node string
	grouper := transformer.GroupRecords(inputChan, &experiment, &node)
	for grouper.NextGroup() {
		sizePerDay := make(map[int64]int64)
		for grouper.NextRecord() {
			record := grouper.Read()
			var statsValue StatsValue
			lex.DecodeOrDie(record.Value, &statsValue)
			roundedTimestamp := truncateTimestampToDay(statsValue.ReceivedTimestamp)
			sizePerDay[roundedTimestamp] += statsValue.Size
		}

		for timestamp, size := range sizePerDay {
			outputChan <- &store.Record{
				Key:   lex.EncodeOrDie(experiment, node, timestamp),
				Value: lex.EncodeOrDie(size),
			}
		}
	}
}
func groupDomainsAndMacAddresses(inputChan, outputChan chan *store.Record) {
	var (
		session                SessionKey
		flowId, sequenceNumber int32
	)
	grouper := transformer.GroupRecords(inputChan, &session, &flowId, &sequenceNumber)
	for grouper.NextGroup() {
		var domains, macAddresses [][]byte
		for grouper.NextRecord() {
			record := grouper.Read()
			var domain, macAddress []byte
			lex.DecodeOrDie(record.Key, &domain, &macAddress)
			domains = append(domains, domain)
			macAddresses = append(macAddresses, macAddress)
		}
		outputChan <- &store.Record{
			Key:   grouper.CurrentGroupPrefix,
			Value: lex.EncodeOrDie(domains, macAddresses),
		}
	}
}
func joinWhitelistedDomainsWithFlows(inputChan, outputChan chan *store.Record) {
	var (
		session              SessionKey
		macAddress, remoteIp []byte
	)
	grouper := transformer.GroupRecords(inputChan, &session, &macAddress, &remoteIp)
	for grouper.NextGroup() {
		type timestampsAndDomain struct {
			start, end int64
			domain     []byte
		}
		var domains []*timestampsAndDomain
		for grouper.NextRecord() {
			record := grouper.Read()

			switch record.DatabaseIndex {
			case 0:
				var newEntry timestampsAndDomain
				lex.DecodeOrDie(record.Key, &newEntry.start, &newEntry.end, &newEntry.domain)
				domains = append(domains, &newEntry)
			case 1:
				if domains != nil {
					var (
						timestamp, unusedInfinity int64
						sequenceNumber, flowId    int32
					)
					lex.DecodeOrDie(record.Key, &timestamp, &unusedInfinity, &sequenceNumber, &flowId)
					for _, entry := range domains {
						if entry.start <= timestamp && entry.end >= timestamp {
							outputChan <- &store.Record{
								Key: lex.EncodeOrDie(&session, flowId, sequenceNumber, entry.domain, macAddress),
							}
						}
					}
				}
			}
		}
	}
}
Example #23
0
func computeInterarrivalTimes(inputChan, outputChan chan *store.Record) {
	var experiment, node string
	grouper := transformer.GroupRecords(inputChan, &experiment, &node)
	for grouper.NextGroup() {
		var lastTimestamp int64
		for grouper.NextRecord() {
			record := grouper.Read()

			var timestamp int64
			var filename string
			lex.DecodeOrDie(record.Key, &timestamp, &filename)

			if lastTimestamp > 0 {
				interarrivalTime := timestamp - lastTimestamp
				outputChan <- &store.Record{
					Key: lex.EncodeOrDie(experiment, node, interarrivalTime, filename),
				}
			}

			lastTimestamp = timestamp
		}
	}
}
Example #24
0
func detectChangedPackageVersions(inputChan, outputChan chan *store.Record) {
	var node, packageName string
	grouper := transformer.GroupRecords(inputChan, &node, &packageName)
	for grouper.NextGroup() {
		var lastVersion string
		for grouper.NextRecord() {
			record := grouper.Read()
			var timestamp int64
			lex.DecodeOrDie(record.Key, &timestamp)
			var version string
			lex.DecodeOrDie(record.Value, &version)

			if version != lastVersion {
				outputChan <- &store.Record{
					Key:   lex.EncodeOrDie(node, packageName, timestamp),
					Value: lex.EncodeOrDie(version),
				}
			}

			lastVersion = version
		}
	}
}
func joinARecordsWithCnameRecords(inputChan, outputChan chan *store.Record) {
	var (
		session            SessionKey
		macAddress, domain []byte
		anonymized         bool
	)
	grouper := transformer.GroupRecords(inputChan, &session, &macAddress, &domain, &anonymized)
	for grouper.NextGroup() {
		var allRecords dnsRecords
		for grouper.NextRecord() {
			record := grouper.Read()
			newStartDnsRecord := dnsRecord{startEvent: true, aRecord: record.DatabaseIndex == 0}
			newEndDnsRecord := dnsRecord{startEvent: false, aRecord: record.DatabaseIndex == 0}
			lex.DecodeOrDie(record.Key, &newStartDnsRecord.timestamp, &newEndDnsRecord.timestamp, &newStartDnsRecord.value)
			newEndDnsRecord.value = newStartDnsRecord.value
			allRecords = append(allRecords, &newStartDnsRecord)
			allRecords = append(allRecords, &newEndDnsRecord)
		}
		sort.Sort(allRecords)

		currentAValues := make(map[string]int64)
		currentCnameValues := make(map[string]int64)
		currentACounts := make(map[string]int)
		currentCnameCounts := make(map[string]int)
		for _, record := range allRecords {
			switch record.aRecord {
			case true:
				switch record.startEvent {
				case true:
					timestamp := record.timestamp
					if oldTimestamp, ok := currentAValues[record.value]; ok {
						timestamp = minInt64(timestamp, oldTimestamp)
					}
					currentAValues[record.value] = timestamp
					currentACounts[record.value]++
				case false:
					currentACounts[record.value]--
					if currentACounts[record.value] == 0 {
						startTimestamp := currentAValues[record.value]
						delete(currentAValues, record.value)
						for domain, timestamp := range currentCnameValues {
							outputChan <- &store.Record{
								Key: lex.EncodeOrDie(&session, domain, macAddress, record.value, maxInt64(startTimestamp, timestamp), record.timestamp),
							}
						}
					}
				}
			case false:
				switch record.startEvent {
				case true:
					timestamp := record.timestamp
					if oldTimestamp, ok := currentCnameValues[record.value]; ok {
						timestamp = minInt64(timestamp, oldTimestamp)
					}
					currentCnameValues[record.value] = timestamp
					currentCnameCounts[record.value]++
				case false:
					currentCnameCounts[record.value]--
					if currentCnameCounts[record.value] == 0 {
						startTimestamp := currentCnameValues[record.value]
						delete(currentCnameValues, record.value)
						for ip, timestamp := range currentAValues {
							outputChan <- &store.Record{
								Key: lex.EncodeOrDie(&session, record.value, macAddress, ip, maxInt64(startTimestamp, timestamp), record.timestamp),
							}
						}
					}
				}
			}
		}
	}
}