func BytesPerDevicePipeline(levelDbManager store.Manager, bytesPerDevicePostgresStore store.Writer) transformer.Pipeline { tracesStore := levelDbManager.Seeker("traces") availabilityIntervalsStore := levelDbManager.Seeker("consistent-ranges") sessionsStore := levelDbManager.ReadingDeleter("bytesperdevice-session") addressTableStore := levelDbManager.SeekingWriter("bytesperdevice-address-table") flowTableStore := levelDbManager.SeekingWriter("bytesperdevice-flow-table") packetsStore := levelDbManager.SeekingWriter("bytesperdevice-packets") flowIdToMacStore := levelDbManager.SeekingWriter("bytesperdevice-flow-id-to-mac") flowIdToMacsStore := levelDbManager.SeekingWriter("bytesperdevice-flow-id-to-macs") bytesPerDeviceUnreducedStore := levelDbManager.SeekingWriter("bytesperdevice-unreduced") bytesPerDeviceSessionStore := levelDbManager.ReadingWriter("bytesperdevice-reduced-sessions") bytesPerDeviceStore := levelDbManager.ReadingWriter("bytesperdevice") traceKeyRangesStore := levelDbManager.ReadingDeleter("bytesperdevice-trace-key-ranges") consolidatedTraceKeyRangesStore := levelDbManager.ReadingDeleter("bytesperdevice-consolidated-trace-key-ranges") newTracesStore := store.NewRangeExcludingReader(store.NewRangeIncludingReader(tracesStore, availabilityIntervalsStore), traceKeyRangesStore) return append([]transformer.PipelineStage{ transformer.PipelineStage{ Name: "BytesPerDeviceMapper", Reader: newTracesStore, Transformer: transformer.MakeMultipleOutputsDoFunc(bytesPerDeviceMapper, 3), Writer: store.NewMuxingWriter(addressTableStore, flowTableStore, packetsStore), }, SessionPipelineStage(newTracesStore, sessionsStore), transformer.PipelineStage{ Name: "JoinMacAndFlowId", Reader: store.NewPrefixIncludingReader(store.NewDemuxingSeeker(addressTableStore, flowTableStore), sessionsStore), Transformer: transformer.TransformFunc(joinMacAndFlowId), Writer: flowIdToMacStore, }, transformer.PipelineStage{ Name: "FlattenMacAddresses", Reader: store.NewPrefixIncludingReader(flowIdToMacStore, sessionsStore), Transformer: transformer.TransformFunc(flattenMacAddresses), Writer: flowIdToMacsStore, }, transformer.PipelineStage{ Name: "JoinMacAndSizes", Reader: store.NewPrefixIncludingReader(store.NewDemuxingSeeker(flowIdToMacsStore, packetsStore), sessionsStore), Transformer: transformer.TransformFunc(joinMacAndSizes), Writer: bytesPerDeviceUnreducedStore, }, transformer.PipelineStage{ Name: "ReduceBytesPerDeviceSession", Reader: store.NewPrefixIncludingReader(bytesPerDeviceUnreducedStore, sessionsStore), Transformer: transformer.TransformFunc(reduceBytesPerDeviceSession), Writer: bytesPerDeviceSessionStore, }, transformer.PipelineStage{ Name: "ReduceBytesPerDevice", Reader: bytesPerDeviceSessionStore, Transformer: transformer.TransformFunc(reduceBytesPerDevice), Writer: bytesPerDeviceStore, }, transformer.PipelineStage{ Name: "BytesPerDevicePostgres", Reader: bytesPerDeviceStore, Writer: bytesPerDevicePostgresStore, }, }, TraceKeyRangesPipeline(newTracesStore, traceKeyRangesStore, consolidatedTraceKeyRangesStore)...) }
func IndexTarballsPipeline(tarballsPath string, levelDbManager store.Manager) transformer.Pipeline { allTarballsPattern := filepath.Join(tarballsPath, "all", "health", "*", "*", "health_*.tar.gz") dailyTarballsPattern := filepath.Join(tarballsPath, "by-date", "*", "health", "*", "health_*.tar.gz") tarnamesStore := levelDbManager.ReadingWriter("tarnames") tarnamesIndexedStore := levelDbManager.ReadingWriter("tarnames-indexed") logsStore := levelDbManager.Writer("logs") return []transformer.PipelineStage{ transformer.PipelineStage{ Name: "ScanLogTarballs", Reader: store.NewGlobReader(allTarballsPattern), Writer: tarnamesStore, }, transformer.PipelineStage{ Name: "ScanDailyLogTarballs", Reader: store.NewGlobReader(dailyTarballsPattern), Writer: tarnamesStore, }, transformer.PipelineStage{ Name: "ReadLogTarballs", Reader: store.NewDemuxingReader(tarnamesStore, tarnamesIndexedStore), Transformer: transformer.MakeMultipleOutputsGroupDoFunc(IndexTarballs, 2), Writer: store.NewMuxingWriter(logsStore, tarnamesIndexedStore), }, } }
func IndexTarballsPipeline(tarballsPath string, levelDbManager store.Manager) transformer.Pipeline { tarballsPattern := filepath.Join(tarballsPath, "*", "*", "*.tar.gz") tarnamesStore := levelDbManager.ReadingWriter("tarnames") tarnamesIndexedStore := levelDbManager.ReadingWriter("tarnames-indexed") tracesStore := levelDbManager.Writer("traces") return []transformer.PipelineStage{ transformer.PipelineStage{ Name: "ScanTraceTarballs", Reader: store.NewGlobReader(tarballsPattern), Writer: tarnamesStore, }, transformer.PipelineStage{ Name: "IndexTraces", Transformer: transformer.MakeMultipleOutputsGroupDoFunc(IndexTarballs, 2), Reader: store.NewDemuxingReader(tarnamesStore, tarnamesIndexedStore), Writer: store.NewMuxingWriter(tracesStore, tarnamesIndexedStore), }, } }
func BytesPerDomainPipeline(levelDbManager store.Manager, bytesPerDomainPostgresStore store.Writer) transformer.Pipeline { tracesStore := levelDbManager.Seeker("traces") availabilityIntervalsStore := levelDbManager.Seeker("consistent-ranges") traceKeyRangesStore := levelDbManager.ReadingDeleter("bytesperdomain-trace-key-ranges") consolidatedTraceKeyRangesStore := levelDbManager.ReadingDeleter("bytesperdomain-consolidated-trace-key-ranges") addressIdTableStore := levelDbManager.SeekingWriter("bytesperdomain-address-id-table") aRecordTableStore := levelDbManager.SeekingWriter("bytesperdomain-a-record-table") cnameRecordTableStore := levelDbManager.SeekingWriter("bytesperdomain-cname-record-table") flowIpsTableStore := levelDbManager.SeekingWriter("bytesperdomain-flow-ips-table") addressIpTableStore := levelDbManager.SeekingWriter("bytesperdomain-address-ip-table") bytesPerTimestampShardedStore := levelDbManager.SeekingWriter("bytesperdomain-bytes-per-timestamp-sharded") whitelistStore := levelDbManager.SeekingWriter("bytesperdomain-whitelist") aRecordsWithMacStore := levelDbManager.SeekingWriter("bytesperdomain-a-records-with-mac") cnameRecordsWithMacStore := levelDbManager.SeekingWriter("bytesperdomain-cname-records-with-mac") allDnsMappingsStore := levelDbManager.SeekingWriter("bytesperdomain-all-dns-mappings") allWhitelistedMappingsStore := levelDbManager.SeekingWriter("bytesperdomain-all-whitelisted-mappings") flowMacsTableStore := levelDbManager.SeekingWriter("bytesperdomain-flow-macs-table") flowDomainsTableStore := levelDbManager.SeekingWriter("bytesperdomain-flow-domains-table") flowDomainsGroupedTableStore := levelDbManager.SeekingWriter("bytesperdomain-flow-domains-grouped-table") bytesPerDomainShardedStore := levelDbManager.ReadingWriter("bytesperdomain-bytes-per-domain-sharded") bytesPerDomainPerDeviceStore := levelDbManager.ReadingWriter("bytesperdomain-bytes-per-domain-per-device") bytesPerDomainStore := levelDbManager.ReadingWriter("bytesperdomain-bytes-per-domain") sessionsStore := levelDbManager.ReadingDeleter("bytesperdomain-sessions") excludeOldSessions := func(stor store.Seeker) store.Seeker { return store.NewPrefixIncludingReader(stor, sessionsStore) } newTracesStore := store.NewRangeExcludingReader(store.NewRangeIncludingReader(tracesStore, availabilityIntervalsStore), traceKeyRangesStore) return append([]transformer.PipelineStage{ transformer.PipelineStage{ Name: "BytesPerDomainMapper", Reader: newTracesStore, Transformer: transformer.MakeMultipleOutputsDoFunc(bytesPerDomainMapper, 7), Writer: store.NewMuxingWriter(addressIdTableStore, aRecordTableStore, cnameRecordTableStore, flowIpsTableStore, addressIpTableStore, bytesPerTimestampShardedStore, whitelistStore), }, SessionPipelineStage(newTracesStore, sessionsStore), transformer.PipelineStage{ Name: "JoinAAddressIdsWithMacAddresses", Reader: excludeOldSessions(store.NewDemuxingSeeker(addressIdTableStore, aRecordTableStore)), Transformer: transformer.TransformFunc(joinAddressIdsWithMacAddresses), Writer: aRecordsWithMacStore, }, transformer.PipelineStage{ Name: "JoinCnameAddressIdsWithMacAddresses", Reader: excludeOldSessions(store.NewDemuxingSeeker(addressIdTableStore, cnameRecordTableStore)), Transformer: transformer.TransformFunc(joinAddressIdsWithMacAddresses), Writer: cnameRecordsWithMacStore, }, transformer.PipelineStage{ Name: "JoinARecordsWithCnameRecords", Reader: excludeOldSessions(store.NewDemuxingSeeker(aRecordsWithMacStore, cnameRecordsWithMacStore)), Transformer: transformer.TransformFunc(joinARecordsWithCnameRecords), Writer: allDnsMappingsStore, }, transformer.PipelineStage{ Name: "EmitARecords", Reader: excludeOldSessions(aRecordsWithMacStore), Transformer: transformer.MakeDoFunc(emitARecords), Writer: allDnsMappingsStore, }, transformer.PipelineStage{ Name: "JoinDomainsWithWhitelist", Reader: excludeOldSessions(store.NewDemuxingSeeker(whitelistStore, allDnsMappingsStore)), Transformer: transformer.TransformFunc(joinDomainsWithWhitelist), Writer: allWhitelistedMappingsStore, }, transformer.PipelineStage{ Name: "JoinMacWithFlowId", Reader: excludeOldSessions(store.NewDemuxingSeeker(addressIpTableStore, flowIpsTableStore)), Transformer: transformer.TransformFunc(joinMacWithFlowId), Writer: flowMacsTableStore, }, transformer.PipelineStage{ Name: "JoinWhitelistedDomainsWithFlows", Reader: excludeOldSessions(store.NewDemuxingSeeker(allWhitelistedMappingsStore, flowMacsTableStore)), Transformer: transformer.TransformFunc(joinWhitelistedDomainsWithFlows), Writer: flowDomainsTableStore, }, transformer.PipelineStage{ Name: "GroupDomainsAndMacAddresses", Reader: excludeOldSessions(flowDomainsTableStore), Transformer: transformer.TransformFunc(groupDomainsAndMacAddresses), Writer: flowDomainsGroupedTableStore, }, transformer.PipelineStage{ Name: "JoinDomainsWithSizes", Reader: excludeOldSessions(store.NewDemuxingSeeker(flowDomainsGroupedTableStore, bytesPerTimestampShardedStore)), Transformer: transformer.TransformFunc(joinDomainsWithSizes), Writer: bytesPerDomainShardedStore, }, transformer.PipelineStage{ Name: "FlattenIntoBytesPerDevice", Reader: bytesPerDomainShardedStore, Transformer: transformer.TransformFunc(flattenIntoBytesPerDevice), Writer: bytesPerDomainPerDeviceStore, }, transformer.PipelineStage{ Name: "FlattenIntoBytesPerTimestamp", Reader: bytesPerDomainShardedStore, Transformer: transformer.TransformFunc(flattenIntoBytesPerTimestamp), Writer: bytesPerDomainStore, }, transformer.PipelineStage{ Name: "BytesPerDomainPostgresStore", Reader: bytesPerDomainStore, Writer: bytesPerDomainPostgresStore, }, }, TraceKeyRangesPipeline(newTracesStore, traceKeyRangesStore, consolidatedTraceKeyRangesStore)...) }