func init() {
	c, err := dms.New(dms.DefaultServiceURL)
	if err != nil {
		fmt.Fprintf(os.Stderr, "Error initializing client: %s\n", err)
		os.Exit(1)
	}

	model, _ := c.ModelRevision("i2b2_pedsnet", "2.0.1")
	table = model.Tables.Get("i2b2")
}
Ejemplo n.º 2
0
func main() {
	var (
		service   string
		modelName string
		version   string
		delim     string
		compr     string
	)

	flag.StringVar(&modelName, "model", "", "The model to validate against. Required.")
	flag.StringVar(&version, "version", "", "The specific version of the model to validate against. Defaults to the latest version of the model.")
	flag.StringVar(&service, "service", dms.DefaultServiceURL, "The data models service to use for fetching schema information.")

	flag.StringVar(&delim, "delim", ",", "The delimiter used in the input files or stream.")
	flag.StringVar(&compr, "compr", "", "The compression method used on the input files or stream. If ommitted the file extension will be used to infer the compression method: .gz, .gzip, .bzip2, .bz2.")

	flag.Parse()

	// Check required options.
	if modelName == "" {
		fmt.Println("A model must be specified.")
		os.Exit(1)
	}

	inputs := flag.Args()

	if len(inputs) == 0 {
		fmt.Println("At least one input must be specified.")
		os.Exit(1)
	}

	// Initialize data models client for service.
	c, err := dms.New(service)

	if err != nil {
		fmt.Println(err)
		os.Exit(1)
	}

	if err = c.Ping(); err != nil {
		fmt.Println(err)
		os.Exit(1)
	}

	revisions, err := c.ModelRevisions(modelName)

	if err != nil {
		fmt.Println(err)
		os.Exit(1)
	}

	var model *dms.Model

	// Get the latest version.
	if version == "" {
		model = revisions.Latest()
	} else {

		var (
			versions []string
			_model   *dms.Model
		)

		for _, _model = range revisions.List() {
			if _model.Version == version {
				model = _model
				break
			}

			versions = append(versions, _model.Version)
		}

		if model == nil {
			fmt.Printf("Invalid version for '%s'. Choose from: %s\n", modelName, strings.Join(versions, ", "))
			os.Exit(1)
		}
	}

	fmt.Printf("Validating against model '%s/%s'\n", model.Name, model.Version)

	var (
		hasErrors bool
		tableName string
		table     *dms.Table
	)

	for _, name := range inputs {
		// The file name may have a suffix containing the table name, name[:table].
		// The fallback is to use the file name without the extension.
		toks := strings.SplitN(name, ":", 2)

		if len(toks) == 2 {
			name = toks[0]
			tableName = toks[1]
		} else {
			name = toks[0]

			toks = strings.SplitN(filepath.Base(name), ".", 2)
			tableName = toks[0]
		}

		if table = model.Tables.Get(tableName); table == nil {
			fmt.Printf("* Unknown table '%s'.\nChoices are: %s\n", tableName, strings.Join(model.Tables.Names(), ", "))
			continue
		}

		fmt.Printf("* Evaluating '%s' table in '%s'...\n", tableName, name)

		// Open the reader.
		reader, err := validator.Open(name, compr)

		if err != nil {
			fmt.Printf("* Could not open file: %s\n", err)
			continue
		}

		v := validator.New(reader, table)

		if err = v.Init(); err != nil {
			fmt.Printf("* Problem reading CSV header: %s\n", err)
			reader.Close()
			continue
		}

		if err = v.Run(); err != nil {
			fmt.Printf("* Problem reading CSV data: %s\n", err)
		}

		reader.Close()

		// Build the result.
		result := v.Result()

		lerrs := result.LineErrors()

		if len(lerrs) > 0 {
			hasErrors = true

			fmt.Println("* Row-level issues were found.")

			// Row level issues.
			tw := tablewriter.NewWriter(os.Stdout)

			tw.SetHeader([]string{
				"code",
				"error",
				"occurrences",
				"lines",
				"example",
			})

			var lines, example string

			for err, verrs := range result.LineErrors() {
				ve := verrs[0]

				if ve.Context != nil {
					example = fmt.Sprintf("line %d: `%v` %v", ve.Line, ve.Value, ve.Context)
				} else {
					example = fmt.Sprintf("line %d: `%v`", ve.Line, ve.Value)
				}

				errsteps := errLineSteps(verrs)

				if len(errsteps) > 10 {
					lines = fmt.Sprintf("%s ... (%d more)", strings.Join(errsteps[:10], ", "), len(errsteps[10:]))
				} else {
					lines = strings.Join(errsteps, ", ")
				}

				tw.Append([]string{
					fmt.Sprint(err.Code),
					err.Description,
					fmt.Sprint(len(verrs)),
					lines,
					example,
				})
			}

			tw.Render()
		}

		// Field level issues.
		tw := tablewriter.NewWriter(os.Stdout)

		tw.SetHeader([]string{
			"field",
			"code",
			"error",
			"occurrences",
			"lines",
			"samples",
		})

		var nerrs int

		// Output the error occurrence per field.
		for _, f := range v.Header {
			errmap := result.FieldErrors(f)

			if len(errmap) == 0 {
				continue
			}

			nerrs += len(errmap)

			var (
				lines  string
				sample []*validator.ValidationError
			)

			for err, verrs := range errmap {
				num := len(verrs)

				if num >= sampleSize {
					sample = make([]*validator.ValidationError, sampleSize)

					// Randomly sample.
					for i, _ := range sample {
						j := rand.Intn(num)
						sample[i] = verrs[j]
					}
				} else {
					sample = verrs
				}

				sstrings := make([]string, len(sample))

				for i, ve := range sample {
					if ve.Context != nil {
						sstrings[i] = fmt.Sprintf("line %d: `%s` %s", ve.Line, ve.Value, ve.Context)
					} else {
						sstrings[i] = fmt.Sprintf("line %d: `%s`", ve.Line, ve.Value)
					}
				}

				errsteps := errLineSteps(verrs)

				if len(errsteps) > 10 {
					lines = fmt.Sprintf("%s ... (%d more)", strings.Join(errsteps[:10], ", "), len(errsteps[10:]))
				} else {
					lines = strings.Join(errsteps, ", ")
				}

				tw.Append([]string{
					f,
					fmt.Sprint(err.Code),
					err.Description,
					fmt.Sprint(num),
					lines,
					strings.Join(sstrings, "\n"),
				})
			}
		}

		if nerrs > 0 {
			hasErrors = true
			fmt.Println("* Field-level issues were found.")
			tw.Render()
		} else if len(lerrs) == 0 {
			fmt.Println("* Everything looks good!")
		}
	}

	if hasErrors {
		os.Exit(1)
	}
}