func BytesPerDevicePipeline(levelDbManager store.Manager, bytesPerDevicePostgresStore store.Writer) transformer.Pipeline {
	tracesStore := levelDbManager.Seeker("traces")
	availabilityIntervalsStore := levelDbManager.Seeker("consistent-ranges")
	sessionsStore := levelDbManager.ReadingDeleter("bytesperdevice-session")
	addressTableStore := levelDbManager.SeekingWriter("bytesperdevice-address-table")
	flowTableStore := levelDbManager.SeekingWriter("bytesperdevice-flow-table")
	packetsStore := levelDbManager.SeekingWriter("bytesperdevice-packets")
	flowIdToMacStore := levelDbManager.SeekingWriter("bytesperdevice-flow-id-to-mac")
	flowIdToMacsStore := levelDbManager.SeekingWriter("bytesperdevice-flow-id-to-macs")
	bytesPerDeviceUnreducedStore := levelDbManager.SeekingWriter("bytesperdevice-unreduced")
	bytesPerDeviceSessionStore := levelDbManager.ReadingWriter("bytesperdevice-reduced-sessions")
	bytesPerDeviceStore := levelDbManager.ReadingWriter("bytesperdevice")
	traceKeyRangesStore := levelDbManager.ReadingDeleter("bytesperdevice-trace-key-ranges")
	consolidatedTraceKeyRangesStore := levelDbManager.ReadingDeleter("bytesperdevice-consolidated-trace-key-ranges")
	newTracesStore := store.NewRangeExcludingReader(store.NewRangeIncludingReader(tracesStore, availabilityIntervalsStore), traceKeyRangesStore)
	return append([]transformer.PipelineStage{
		transformer.PipelineStage{
			Name:        "BytesPerDeviceMapper",
			Reader:      newTracesStore,
			Transformer: transformer.MakeMultipleOutputsDoFunc(bytesPerDeviceMapper, 3),
			Writer:      store.NewMuxingWriter(addressTableStore, flowTableStore, packetsStore),
		},
		SessionPipelineStage(newTracesStore, sessionsStore),
		transformer.PipelineStage{
			Name:        "JoinMacAndFlowId",
			Reader:      store.NewPrefixIncludingReader(store.NewDemuxingSeeker(addressTableStore, flowTableStore), sessionsStore),
			Transformer: transformer.TransformFunc(joinMacAndFlowId),
			Writer:      flowIdToMacStore,
		},
		transformer.PipelineStage{
			Name:        "FlattenMacAddresses",
			Reader:      store.NewPrefixIncludingReader(flowIdToMacStore, sessionsStore),
			Transformer: transformer.TransformFunc(flattenMacAddresses),
			Writer:      flowIdToMacsStore,
		},
		transformer.PipelineStage{
			Name:        "JoinMacAndSizes",
			Reader:      store.NewPrefixIncludingReader(store.NewDemuxingSeeker(flowIdToMacsStore, packetsStore), sessionsStore),
			Transformer: transformer.TransformFunc(joinMacAndSizes),
			Writer:      bytesPerDeviceUnreducedStore,
		},
		transformer.PipelineStage{
			Name:        "ReduceBytesPerDeviceSession",
			Reader:      store.NewPrefixIncludingReader(bytesPerDeviceUnreducedStore, sessionsStore),
			Transformer: transformer.TransformFunc(reduceBytesPerDeviceSession),
			Writer:      bytesPerDeviceSessionStore,
		},
		transformer.PipelineStage{
			Name:        "ReduceBytesPerDevice",
			Reader:      bytesPerDeviceSessionStore,
			Transformer: transformer.TransformFunc(reduceBytesPerDevice),
			Writer:      bytesPerDeviceStore,
		},
		transformer.PipelineStage{
			Name:   "BytesPerDevicePostgres",
			Reader: bytesPerDeviceStore,
			Writer: bytesPerDevicePostgresStore,
		},
	}, TraceKeyRangesPipeline(newTracesStore, traceKeyRangesStore, consolidatedTraceKeyRangesStore)...)
}
Example #2
0
func IndexTarballsPipeline(tarballsPath string, levelDbManager store.Manager) transformer.Pipeline {
	allTarballsPattern := filepath.Join(tarballsPath, "all", "health", "*", "*", "health_*.tar.gz")
	dailyTarballsPattern := filepath.Join(tarballsPath, "by-date", "*", "health", "*", "health_*.tar.gz")
	tarnamesStore := levelDbManager.ReadingWriter("tarnames")
	tarnamesIndexedStore := levelDbManager.ReadingWriter("tarnames-indexed")
	logsStore := levelDbManager.Writer("logs")
	return []transformer.PipelineStage{
		transformer.PipelineStage{
			Name:   "ScanLogTarballs",
			Reader: store.NewGlobReader(allTarballsPattern),
			Writer: tarnamesStore,
		},
		transformer.PipelineStage{
			Name:   "ScanDailyLogTarballs",
			Reader: store.NewGlobReader(dailyTarballsPattern),
			Writer: tarnamesStore,
		},
		transformer.PipelineStage{
			Name:        "ReadLogTarballs",
			Reader:      store.NewDemuxingReader(tarnamesStore, tarnamesIndexedStore),
			Transformer: transformer.MakeMultipleOutputsGroupDoFunc(IndexTarballs, 2),
			Writer:      store.NewMuxingWriter(logsStore, tarnamesIndexedStore),
		},
	}
}
func IndexTarballsPipeline(tarballsPath string, levelDbManager store.Manager) transformer.Pipeline {
	tarballsPattern := filepath.Join(tarballsPath, "*", "*", "*.tar.gz")
	tarnamesStore := levelDbManager.ReadingWriter("tarnames")
	tarnamesIndexedStore := levelDbManager.ReadingWriter("tarnames-indexed")
	tracesStore := levelDbManager.Writer("traces")
	return []transformer.PipelineStage{
		transformer.PipelineStage{
			Name:   "ScanTraceTarballs",
			Reader: store.NewGlobReader(tarballsPattern),
			Writer: tarnamesStore,
		},
		transformer.PipelineStage{
			Name:        "IndexTraces",
			Transformer: transformer.MakeMultipleOutputsGroupDoFunc(IndexTarballs, 2),
			Reader:      store.NewDemuxingReader(tarnamesStore, tarnamesIndexedStore),
			Writer:      store.NewMuxingWriter(tracesStore, tarnamesIndexedStore),
		},
	}
}
func BytesPerDomainPipeline(levelDbManager store.Manager, bytesPerDomainPostgresStore store.Writer) transformer.Pipeline {
	tracesStore := levelDbManager.Seeker("traces")
	availabilityIntervalsStore := levelDbManager.Seeker("consistent-ranges")
	traceKeyRangesStore := levelDbManager.ReadingDeleter("bytesperdomain-trace-key-ranges")
	consolidatedTraceKeyRangesStore := levelDbManager.ReadingDeleter("bytesperdomain-consolidated-trace-key-ranges")
	addressIdTableStore := levelDbManager.SeekingWriter("bytesperdomain-address-id-table")
	aRecordTableStore := levelDbManager.SeekingWriter("bytesperdomain-a-record-table")
	cnameRecordTableStore := levelDbManager.SeekingWriter("bytesperdomain-cname-record-table")
	flowIpsTableStore := levelDbManager.SeekingWriter("bytesperdomain-flow-ips-table")
	addressIpTableStore := levelDbManager.SeekingWriter("bytesperdomain-address-ip-table")
	bytesPerTimestampShardedStore := levelDbManager.SeekingWriter("bytesperdomain-bytes-per-timestamp-sharded")
	whitelistStore := levelDbManager.SeekingWriter("bytesperdomain-whitelist")
	aRecordsWithMacStore := levelDbManager.SeekingWriter("bytesperdomain-a-records-with-mac")
	cnameRecordsWithMacStore := levelDbManager.SeekingWriter("bytesperdomain-cname-records-with-mac")
	allDnsMappingsStore := levelDbManager.SeekingWriter("bytesperdomain-all-dns-mappings")
	allWhitelistedMappingsStore := levelDbManager.SeekingWriter("bytesperdomain-all-whitelisted-mappings")
	flowMacsTableStore := levelDbManager.SeekingWriter("bytesperdomain-flow-macs-table")
	flowDomainsTableStore := levelDbManager.SeekingWriter("bytesperdomain-flow-domains-table")
	flowDomainsGroupedTableStore := levelDbManager.SeekingWriter("bytesperdomain-flow-domains-grouped-table")
	bytesPerDomainShardedStore := levelDbManager.ReadingWriter("bytesperdomain-bytes-per-domain-sharded")
	bytesPerDomainPerDeviceStore := levelDbManager.ReadingWriter("bytesperdomain-bytes-per-domain-per-device")
	bytesPerDomainStore := levelDbManager.ReadingWriter("bytesperdomain-bytes-per-domain")
	sessionsStore := levelDbManager.ReadingDeleter("bytesperdomain-sessions")
	excludeOldSessions := func(stor store.Seeker) store.Seeker {
		return store.NewPrefixIncludingReader(stor, sessionsStore)
	}
	newTracesStore := store.NewRangeExcludingReader(store.NewRangeIncludingReader(tracesStore, availabilityIntervalsStore), traceKeyRangesStore)
	return append([]transformer.PipelineStage{
		transformer.PipelineStage{
			Name:        "BytesPerDomainMapper",
			Reader:      newTracesStore,
			Transformer: transformer.MakeMultipleOutputsDoFunc(bytesPerDomainMapper, 7),
			Writer:      store.NewMuxingWriter(addressIdTableStore, aRecordTableStore, cnameRecordTableStore, flowIpsTableStore, addressIpTableStore, bytesPerTimestampShardedStore, whitelistStore),
		},
		SessionPipelineStage(newTracesStore, sessionsStore),
		transformer.PipelineStage{
			Name:        "JoinAAddressIdsWithMacAddresses",
			Reader:      excludeOldSessions(store.NewDemuxingSeeker(addressIdTableStore, aRecordTableStore)),
			Transformer: transformer.TransformFunc(joinAddressIdsWithMacAddresses),
			Writer:      aRecordsWithMacStore,
		},
		transformer.PipelineStage{
			Name:        "JoinCnameAddressIdsWithMacAddresses",
			Reader:      excludeOldSessions(store.NewDemuxingSeeker(addressIdTableStore, cnameRecordTableStore)),
			Transformer: transformer.TransformFunc(joinAddressIdsWithMacAddresses),
			Writer:      cnameRecordsWithMacStore,
		},
		transformer.PipelineStage{
			Name:        "JoinARecordsWithCnameRecords",
			Reader:      excludeOldSessions(store.NewDemuxingSeeker(aRecordsWithMacStore, cnameRecordsWithMacStore)),
			Transformer: transformer.TransformFunc(joinARecordsWithCnameRecords),
			Writer:      allDnsMappingsStore,
		},
		transformer.PipelineStage{
			Name:        "EmitARecords",
			Reader:      excludeOldSessions(aRecordsWithMacStore),
			Transformer: transformer.MakeDoFunc(emitARecords),
			Writer:      allDnsMappingsStore,
		},
		transformer.PipelineStage{
			Name:        "JoinDomainsWithWhitelist",
			Reader:      excludeOldSessions(store.NewDemuxingSeeker(whitelistStore, allDnsMappingsStore)),
			Transformer: transformer.TransformFunc(joinDomainsWithWhitelist),
			Writer:      allWhitelistedMappingsStore,
		},
		transformer.PipelineStage{
			Name:        "JoinMacWithFlowId",
			Reader:      excludeOldSessions(store.NewDemuxingSeeker(addressIpTableStore, flowIpsTableStore)),
			Transformer: transformer.TransformFunc(joinMacWithFlowId),
			Writer:      flowMacsTableStore,
		},
		transformer.PipelineStage{
			Name:        "JoinWhitelistedDomainsWithFlows",
			Reader:      excludeOldSessions(store.NewDemuxingSeeker(allWhitelistedMappingsStore, flowMacsTableStore)),
			Transformer: transformer.TransformFunc(joinWhitelistedDomainsWithFlows),
			Writer:      flowDomainsTableStore,
		},
		transformer.PipelineStage{
			Name:        "GroupDomainsAndMacAddresses",
			Reader:      excludeOldSessions(flowDomainsTableStore),
			Transformer: transformer.TransformFunc(groupDomainsAndMacAddresses),
			Writer:      flowDomainsGroupedTableStore,
		},
		transformer.PipelineStage{
			Name:        "JoinDomainsWithSizes",
			Reader:      excludeOldSessions(store.NewDemuxingSeeker(flowDomainsGroupedTableStore, bytesPerTimestampShardedStore)),
			Transformer: transformer.TransformFunc(joinDomainsWithSizes),
			Writer:      bytesPerDomainShardedStore,
		},
		transformer.PipelineStage{
			Name:        "FlattenIntoBytesPerDevice",
			Reader:      bytesPerDomainShardedStore,
			Transformer: transformer.TransformFunc(flattenIntoBytesPerDevice),
			Writer:      bytesPerDomainPerDeviceStore,
		},
		transformer.PipelineStage{
			Name:        "FlattenIntoBytesPerTimestamp",
			Reader:      bytesPerDomainShardedStore,
			Transformer: transformer.TransformFunc(flattenIntoBytesPerTimestamp),
			Writer:      bytesPerDomainStore,
		},
		transformer.PipelineStage{
			Name:   "BytesPerDomainPostgresStore",
			Reader: bytesPerDomainStore,
			Writer: bytesPerDomainPostgresStore,
		},
	}, TraceKeyRangesPipeline(newTracesStore, traceKeyRangesStore, consolidatedTraceKeyRangesStore)...)
}