func init() { c, err := dms.New(dms.DefaultServiceURL) if err != nil { fmt.Fprintf(os.Stderr, "Error initializing client: %s\n", err) os.Exit(1) } model, _ := c.ModelRevision("i2b2_pedsnet", "2.0.1") table = model.Tables.Get("i2b2") }
func main() { var ( service string modelName string version string delim string compr string ) flag.StringVar(&modelName, "model", "", "The model to validate against. Required.") flag.StringVar(&version, "version", "", "The specific version of the model to validate against. Defaults to the latest version of the model.") flag.StringVar(&service, "service", dms.DefaultServiceURL, "The data models service to use for fetching schema information.") flag.StringVar(&delim, "delim", ",", "The delimiter used in the input files or stream.") flag.StringVar(&compr, "compr", "", "The compression method used on the input files or stream. If ommitted the file extension will be used to infer the compression method: .gz, .gzip, .bzip2, .bz2.") flag.Parse() // Check required options. if modelName == "" { fmt.Println("A model must be specified.") os.Exit(1) } inputs := flag.Args() if len(inputs) == 0 { fmt.Println("At least one input must be specified.") os.Exit(1) } // Initialize data models client for service. c, err := dms.New(service) if err != nil { fmt.Println(err) os.Exit(1) } if err = c.Ping(); err != nil { fmt.Println(err) os.Exit(1) } revisions, err := c.ModelRevisions(modelName) if err != nil { fmt.Println(err) os.Exit(1) } var model *dms.Model // Get the latest version. if version == "" { model = revisions.Latest() } else { var ( versions []string _model *dms.Model ) for _, _model = range revisions.List() { if _model.Version == version { model = _model break } versions = append(versions, _model.Version) } if model == nil { fmt.Printf("Invalid version for '%s'. Choose from: %s\n", modelName, strings.Join(versions, ", ")) os.Exit(1) } } fmt.Printf("Validating against model '%s/%s'\n", model.Name, model.Version) var ( hasErrors bool tableName string table *dms.Table ) for _, name := range inputs { // The file name may have a suffix containing the table name, name[:table]. // The fallback is to use the file name without the extension. toks := strings.SplitN(name, ":", 2) if len(toks) == 2 { name = toks[0] tableName = toks[1] } else { name = toks[0] toks = strings.SplitN(filepath.Base(name), ".", 2) tableName = toks[0] } if table = model.Tables.Get(tableName); table == nil { fmt.Printf("* Unknown table '%s'.\nChoices are: %s\n", tableName, strings.Join(model.Tables.Names(), ", ")) continue } fmt.Printf("* Evaluating '%s' table in '%s'...\n", tableName, name) // Open the reader. reader, err := validator.Open(name, compr) if err != nil { fmt.Printf("* Could not open file: %s\n", err) continue } v := validator.New(reader, table) if err = v.Init(); err != nil { fmt.Printf("* Problem reading CSV header: %s\n", err) reader.Close() continue } if err = v.Run(); err != nil { fmt.Printf("* Problem reading CSV data: %s\n", err) } reader.Close() // Build the result. result := v.Result() lerrs := result.LineErrors() if len(lerrs) > 0 { hasErrors = true fmt.Println("* Row-level issues were found.") // Row level issues. tw := tablewriter.NewWriter(os.Stdout) tw.SetHeader([]string{ "code", "error", "occurrences", "lines", "example", }) var lines, example string for err, verrs := range result.LineErrors() { ve := verrs[0] if ve.Context != nil { example = fmt.Sprintf("line %d: `%v` %v", ve.Line, ve.Value, ve.Context) } else { example = fmt.Sprintf("line %d: `%v`", ve.Line, ve.Value) } errsteps := errLineSteps(verrs) if len(errsteps) > 10 { lines = fmt.Sprintf("%s ... (%d more)", strings.Join(errsteps[:10], ", "), len(errsteps[10:])) } else { lines = strings.Join(errsteps, ", ") } tw.Append([]string{ fmt.Sprint(err.Code), err.Description, fmt.Sprint(len(verrs)), lines, example, }) } tw.Render() } // Field level issues. tw := tablewriter.NewWriter(os.Stdout) tw.SetHeader([]string{ "field", "code", "error", "occurrences", "lines", "samples", }) var nerrs int // Output the error occurrence per field. for _, f := range v.Header { errmap := result.FieldErrors(f) if len(errmap) == 0 { continue } nerrs += len(errmap) var ( lines string sample []*validator.ValidationError ) for err, verrs := range errmap { num := len(verrs) if num >= sampleSize { sample = make([]*validator.ValidationError, sampleSize) // Randomly sample. for i, _ := range sample { j := rand.Intn(num) sample[i] = verrs[j] } } else { sample = verrs } sstrings := make([]string, len(sample)) for i, ve := range sample { if ve.Context != nil { sstrings[i] = fmt.Sprintf("line %d: `%s` %s", ve.Line, ve.Value, ve.Context) } else { sstrings[i] = fmt.Sprintf("line %d: `%s`", ve.Line, ve.Value) } } errsteps := errLineSteps(verrs) if len(errsteps) > 10 { lines = fmt.Sprintf("%s ... (%d more)", strings.Join(errsteps[:10], ", "), len(errsteps[10:])) } else { lines = strings.Join(errsteps, ", ") } tw.Append([]string{ f, fmt.Sprint(err.Code), err.Description, fmt.Sprint(num), lines, strings.Join(sstrings, "\n"), }) } } if nerrs > 0 { hasErrors = true fmt.Println("* Field-level issues were found.") tw.Render() } else if len(lerrs) == 0 { fmt.Println("* Everything looks good!") } } if hasErrors { os.Exit(1) } }