func monthlySummaryTaskHandler(w http.ResponseWriter, r *http.Request) { month, year, err := FormValueMonthDefaultToPrev(r) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } countByUser := false now := date.NowInPdt() start := time.Date(int(year), time.Month(month), 1, 0, 0, 0, 0, now.Location()) end := start.AddDate(0, 1, 0).Add(-1 * time.Second) bucketname := "serfr0-reports" filename := start.Format("summary-2006-01.txt") ctx := req2ctx(r) if exists, err := gcs.Exists(ctx, bucketname, filename); err != nil { http.Error(w, fmt.Sprintf("gcs.Exists=%v for gs://%s/%s (err=%v)", exists, bucketname, filename, err), http.StatusInternalServerError) return } else if exists { w.Header().Set("Content-Type", "text/plain") w.Write([]byte(fmt.Sprintf("OK!\nGCS file %s/%s already exists\n", bucketname, filename))) return } tStart := time.Now() str, err := SummaryReport(r, start, end, countByUser) if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } gcsHandle, err := gcs.OpenRW(ctx, bucketname, filename, "text/plain") if err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } gcsHandle.IOWriter().Write([]byte(str)) if err := gcsHandle.Close(); err != nil { http.Error(w, err.Error(), http.StatusInternalServerError) return } w.Header().Set("Content-Type", "text/plain") w.Write([]byte(fmt.Sprintf("OK!\nGCS monthly report %s/%s written, took %s", bucketname, filename, time.Since(tStart)))) }
func generateMonthlyCSV(cdb complaintdb.ComplaintDB, month, year int) (string, int, error) { ctx := cdb.Ctx() bucketname := "serfr0-reports" now := date.NowInPdt() s := time.Date(int(year), time.Month(month), 1, 0, 0, 0, 0, now.Location()) e := s.AddDate(0, 1, 0).Add(-1 * time.Second) log.Infof(ctx, "Starting /be/month: %s", s) // One time, at 00:00, for each day of the given month days := date.IntermediateMidnights(s.Add(-1*time.Second), e) filename := s.Format("complaints-20060102") + e.Format("-20060102.csv") gcsName := "gs://" + bucketname + "/" + filename if exists, err := gcs.Exists(ctx, bucketname, filename); err != nil { return gcsName, 0, fmt.Errorf("gcs.Exists=%v for gs://%s/%s (err=%v)", exists, bucketname, filename, err) } else if exists { return gcsName, 0, nil } gcsHandle, err := gcs.OpenRW(ctx, bucketname, filename, "text/plain") if err != nil { return gcsName, 0, err } csvWriter := csv.NewWriter(gcsHandle.IOWriter()) cols := []string{ "CallerCode", "Name", "Address", "Zip", "Email", "HomeLat", "HomeLong", "UnixEpoch", "Date", "Time(PDT)", "Notes", "ActivityDisturbed", "Flightnumber", "Notes", // Column names above are incorrect, but BKSV are used to them. // //"CallerCode", "Name", "Address", "Zip", "Email", "HomeLat", "HomeLong", //"UnixEpoch", "Date", "Time(PDT)", "Notes", "Flightnumber", //"ActivityDisturbed", "CcSFO", } csvWriter.Write(cols) tStart := time.Now() n := 0 for _, dayStart := range days { dayEnd := dayStart.AddDate(0, 0, 1).Add(-1 * time.Second) log.Infof(ctx, " /be/month: %s - %s", dayStart, dayEnd) tIter := time.Now() iter := cdb.NewLongBatchingIter(cdb.QueryInSpan(dayStart, dayEnd)) for { c, err := iter.NextWithErr() if err != nil { return gcsName, 0, fmt.Errorf("iterator failed after %s (%s): %v", err, time.Since(tIter), time.Since(tStart)) } if c == nil { break } r := []string{ c.Profile.CallerCode, c.Profile.FullName, c.Profile.Address, c.Profile.StructuredAddress.Zip, c.Profile.EmailAddress, fmt.Sprintf("%.4f", c.Profile.Lat), fmt.Sprintf("%.4f", c.Profile.Long), fmt.Sprintf("%d", c.Timestamp.UTC().Unix()), c.Timestamp.Format("2006/01/02"), c.Timestamp.Format("15:04:05"), c.Description, c.AircraftOverhead.FlightNumber, c.Activity, fmt.Sprintf("%v", c.Profile.CcSfo), } if err := csvWriter.Write(r); err != nil { return gcsName, 0, err } n++ } } csvWriter.Flush() if err := gcsHandle.Close(); err != nil { return gcsName, 0, err } log.Infof(ctx, "monthly CSV successfully written to %s, %d rows", gcsName, n) return gcsName, n, nil }
// Returns number of records written (which is zero if the file already exists) func writeAnonymizedGCSFile(r *http.Request, datestring, foldername, filename string) (int, error) { ctx := req2ctx(r) cdb := complaintdb.NewDB(ctx) // Get a list of users that as of right now, have opted out of data sharing. optOutUsers, err := cdb.GetComplainersCurrentlyOptedOut() if err != nil { return 0, fmt.Errorf("get optout users: %v", err) } if exists, err := gcs.Exists(ctx, foldername, filename); err != nil { return 0, err } else if exists { return 0, nil } gcsHandle, err := gcs.OpenRW(ctx, foldername, filename, "application/json") if err != nil { return 0, err } encoder := json.NewEncoder(gcsHandle.IOWriter()) s := date.Datestring2MidnightPdt(datestring) e := s.AddDate(0, 0, 1).Add(-1 * time.Second) // +23:59:59 (or 22:59 or 24:59 when going in/out DST) n := 0 // An iterator expires after 60s, no matter what; so carve up into short-lived iterators for _, dayWindow := range DayWindows(s, e) { iter := cdb.NewLongBatchingIter(cdb.QueryInSpan(dayWindow[0], dayWindow[1])) for { c, err := iter.NextWithErr() if err != nil { return 0, fmt.Errorf("iterator [%s,%s] failed at %s: %v", dayWindow[0], dayWindow[1], time.Now(), err) } else if c == nil { break // we're all done with this iterator } // If the user is currently opted out, ignore their data if _, exists := optOutUsers[c.Profile.EmailAddress]; exists { continue } n++ ac := complaintdb.AnonymizeComplaint(c) if err := encoder.Encode(ac); err != nil { return 0, err } } } if err := gcsHandle.Close(); err != nil { return 0, err } log.Infof(ctx, "GCS bigquery file '%s' successfully written", filename) return n, nil }