Пример #1
0
func monthlySummaryTaskHandler(w http.ResponseWriter, r *http.Request) {
	month, year, err := FormValueMonthDefaultToPrev(r)
	if err != nil {
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}
	countByUser := false
	now := date.NowInPdt()
	start := time.Date(int(year), time.Month(month), 1, 0, 0, 0, 0, now.Location())
	end := start.AddDate(0, 1, 0).Add(-1 * time.Second)

	bucketname := "serfr0-reports"
	filename := start.Format("summary-2006-01.txt")
	ctx := req2ctx(r)

	if exists, err := gcs.Exists(ctx, bucketname, filename); err != nil {
		http.Error(w, fmt.Sprintf("gcs.Exists=%v for gs://%s/%s (err=%v)", exists,
			bucketname, filename, err), http.StatusInternalServerError)
		return
	} else if exists {
		w.Header().Set("Content-Type", "text/plain")
		w.Write([]byte(fmt.Sprintf("OK!\nGCS file %s/%s already exists\n", bucketname, filename)))
		return
	}

	tStart := time.Now()
	str, err := SummaryReport(r, start, end, countByUser)
	if err != nil {
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}

	gcsHandle, err := gcs.OpenRW(ctx, bucketname, filename, "text/plain")
	if err != nil {
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}

	gcsHandle.IOWriter().Write([]byte(str))
	if err := gcsHandle.Close(); err != nil {
		http.Error(w, err.Error(), http.StatusInternalServerError)
		return
	}

	w.Header().Set("Content-Type", "text/plain")
	w.Write([]byte(fmt.Sprintf("OK!\nGCS monthly report %s/%s written, took %s",
		bucketname, filename, time.Since(tStart))))
}
Пример #2
0
func generateMonthlyCSV(cdb complaintdb.ComplaintDB, month, year int) (string, int, error) {
	ctx := cdb.Ctx()
	bucketname := "serfr0-reports"

	now := date.NowInPdt()
	s := time.Date(int(year), time.Month(month), 1, 0, 0, 0, 0, now.Location())
	e := s.AddDate(0, 1, 0).Add(-1 * time.Second)
	log.Infof(ctx, "Starting /be/month: %s", s)

	// One time, at 00:00, for each day of the given month
	days := date.IntermediateMidnights(s.Add(-1*time.Second), e)

	filename := s.Format("complaints-20060102") + e.Format("-20060102.csv")

	gcsName := "gs://" + bucketname + "/" + filename

	if exists, err := gcs.Exists(ctx, bucketname, filename); err != nil {
		return gcsName, 0, fmt.Errorf("gcs.Exists=%v for gs://%s/%s (err=%v)", exists, bucketname, filename, err)
	} else if exists {
		return gcsName, 0, nil
	}

	gcsHandle, err := gcs.OpenRW(ctx, bucketname, filename, "text/plain")
	if err != nil {
		return gcsName, 0, err
	}
	csvWriter := csv.NewWriter(gcsHandle.IOWriter())

	cols := []string{
		"CallerCode", "Name", "Address", "Zip", "Email", "HomeLat", "HomeLong",
		"UnixEpoch", "Date", "Time(PDT)", "Notes", "ActivityDisturbed", "Flightnumber",
		"Notes",
		// Column names above are incorrect, but BKSV are used to them.
		//
		//"CallerCode", "Name", "Address", "Zip", "Email", "HomeLat", "HomeLong",
		//"UnixEpoch", "Date", "Time(PDT)", "Notes", "Flightnumber",
		//"ActivityDisturbed", "CcSFO",
	}
	csvWriter.Write(cols)

	tStart := time.Now()
	n := 0
	for _, dayStart := range days {
		dayEnd := dayStart.AddDate(0, 0, 1).Add(-1 * time.Second)
		log.Infof(ctx, " /be/month: %s - %s", dayStart, dayEnd)

		tIter := time.Now()
		iter := cdb.NewLongBatchingIter(cdb.QueryInSpan(dayStart, dayEnd))
		for {
			c, err := iter.NextWithErr()
			if err != nil {
				return gcsName, 0, fmt.Errorf("iterator failed after %s (%s): %v", err, time.Since(tIter),
					time.Since(tStart))
			}
			if c == nil {
				break
			}

			r := []string{
				c.Profile.CallerCode,
				c.Profile.FullName,
				c.Profile.Address,
				c.Profile.StructuredAddress.Zip,
				c.Profile.EmailAddress,
				fmt.Sprintf("%.4f", c.Profile.Lat),
				fmt.Sprintf("%.4f", c.Profile.Long),

				fmt.Sprintf("%d", c.Timestamp.UTC().Unix()),
				c.Timestamp.Format("2006/01/02"),
				c.Timestamp.Format("15:04:05"),
				c.Description,
				c.AircraftOverhead.FlightNumber,
				c.Activity,
				fmt.Sprintf("%v", c.Profile.CcSfo),
			}

			if err := csvWriter.Write(r); err != nil {
				return gcsName, 0, err
			}

			n++
		}
	}
	csvWriter.Flush()

	if err := gcsHandle.Close(); err != nil {
		return gcsName, 0, err
	}

	log.Infof(ctx, "monthly CSV successfully written to %s, %d rows", gcsName, n)

	return gcsName, n, nil
}
Пример #3
0
// Returns number of records written (which is zero if the file already exists)
func writeAnonymizedGCSFile(r *http.Request, datestring, foldername, filename string) (int, error) {
	ctx := req2ctx(r)
	cdb := complaintdb.NewDB(ctx)

	// Get a list of users that as of right now, have opted out of data sharing.
	optOutUsers, err := cdb.GetComplainersCurrentlyOptedOut()
	if err != nil {
		return 0, fmt.Errorf("get optout users: %v", err)
	}

	if exists, err := gcs.Exists(ctx, foldername, filename); err != nil {
		return 0, err
	} else if exists {
		return 0, nil
	}

	gcsHandle, err := gcs.OpenRW(ctx, foldername, filename, "application/json")
	if err != nil {
		return 0, err
	}

	encoder := json.NewEncoder(gcsHandle.IOWriter())

	s := date.Datestring2MidnightPdt(datestring)
	e := s.AddDate(0, 0, 1).Add(-1 * time.Second) // +23:59:59 (or 22:59 or 24:59 when going in/out DST)

	n := 0
	// An iterator expires after 60s, no matter what; so carve up into short-lived iterators
	for _, dayWindow := range DayWindows(s, e) {
		iter := cdb.NewLongBatchingIter(cdb.QueryInSpan(dayWindow[0], dayWindow[1]))

		for {
			c, err := iter.NextWithErr()
			if err != nil {
				return 0, fmt.Errorf("iterator [%s,%s] failed at %s: %v",
					dayWindow[0], dayWindow[1], time.Now(), err)
			} else if c == nil {
				break // we're all done with this iterator
			}

			// If the user is currently opted out, ignore their data
			if _, exists := optOutUsers[c.Profile.EmailAddress]; exists {
				continue
			}

			n++
			ac := complaintdb.AnonymizeComplaint(c)

			if err := encoder.Encode(ac); err != nil {
				return 0, err
			}
		}
	}

	if err := gcsHandle.Close(); err != nil {
		return 0, err
	}

	log.Infof(ctx, "GCS bigquery file '%s' successfully written", filename)

	return n, nil
}