Exemplo n.º 1
0
// See ingestion.Processor interface.
func (p *pdfProcessor) Process(resultsFile ingestion.ResultFileLocation) error {
	r, err := resultsFile.Open()
	if err != nil {
		return err
	}

	dmResults, err := goldingestion.ParseDMResultsFromReader(r)
	if err != nil {
		return err
	}

	// Get the results in this file that have produced a PDF.
	pdfResults := getPDFResults(dmResults)

	// If there are no PDF results we are done here.
	if len(pdfResults) == 0 {
		return nil
	}

	// Get the output name.
	outFile := resultsFile.Name()

	// check if they have been generated already (the JSON file exists)
	if p.resultExists(p.outJsonBucket, p.outJsonDir, outFile) {
		return nil
	}

	return p.rasterizeAndUpload(outFile, dmResults, pdfResults)
}
func TestPDFProcessor(t *testing.T) {
	testutils.SkipIfShort(t)

	// Get the service account client from meta data or a local config file.
	client, err := auth.NewJWTServiceAccountClient("", auth.DEFAULT_JWT_FILENAME, nil, storage.ScopeFullControl)
	assert.Nil(t, err)

	cacheDir, err := fileutil.EnsureDirExists(CACHE_DIR)
	assert.Nil(t, err)

	// Clean up after the test.
	defer func() {
		defer util.RemoveAll(cacheDir)
		deleteFolderContent(t, TEST_BUCKET, IMAGES_OUT_DIR, client)
		deleteFolderContent(t, TEST_BUCKET, JSON_OUT_DIR, client)
	}()

	// Configure the processor.
	ingesterConf := &sharedconfig.IngesterConfig{
		ExtraParams: map[string]string{
			CONFIG_INPUT_IMAGES_BUCKET:  TEST_BUCKET,
			CONFIG_INPUT_IMAGES_DIR:     IMAGES_IN_DIR,
			CONFIG_OUTPUT_JSON_BUCKET:   TEST_BUCKET,
			CONFIG_OUTPUT_JSON_DIR:      JSON_OUT_DIR,
			CONFIG_OUTPUT_IMAGES_BUCKET: TEST_BUCKET,
			CONFIG_OUTPUT_IMAGES_DIR:    IMAGES_OUT_DIR,
			CONFIG_PDF_CACHEDIR:         cacheDir,
		},
	}
	processor, err := newPDFProcessor(nil, ingesterConf, client)
	assert.Nil(t, err)

	// Load the example file and process it.
	fsResult, err := ingestion.FileSystemResult(TEST_INGESTION_FILE, "./")
	assert.Nil(t, err)

	err = processor.Process(fsResult)
	assert.Nil(t, err)

	// Fetch the json output and parse it.
	pProcessor := processor.(*pdfProcessor)

	// download the result.
	resultFileName := filepath.Join(CACHE_DIR, "result-file.json")
	assert.Nil(t, pProcessor.download(TEST_BUCKET, JSON_OUT_DIR, fsResult.Name(), resultFileName))

	// Make sure we get the expected result.
	fsResult, err = ingestion.FileSystemResult(TEST_INGESTION_FILE, "./")
	assert.Nil(t, err)
	r, err := fsResult.Open()
	assert.Nil(t, err)
	fsDMResults, err := goldingestion.ParseDMResultsFromReader(r)
	assert.Nil(t, err)

	foundResult, err := ingestion.FileSystemResult(resultFileName, "./")
	assert.Nil(t, err)
	r, err = foundResult.Open()
	assert.Nil(t, err)
	foundDMResults, err := goldingestion.ParseDMResultsFromReader(r)
	assert.Nil(t, err)

	dmResult1 := *fsDMResults
	dmResult2 := *foundDMResults
	dmResult1.Results = nil
	dmResult2.Results = nil
	assert.Equal(t, dmResult1, dmResult2)

	foundIdx := 0
	srcResults := fsDMResults.Results
	tgtResults := foundDMResults.Results
	for _, result := range srcResults {
		assert.True(t, foundIdx < len(tgtResults))
		if result.Options["ext"] == "pdf" {
			for ; (foundIdx < len(tgtResults)) && (result.Key["name"] == tgtResults[foundIdx].Key["name"]); foundIdx++ {
				assert.True(t, tgtResults[foundIdx].Key["rasterizer"] != "")
				delete(tgtResults[foundIdx].Key, "rasterizer")
				assert.Equal(t, result.Key, tgtResults[foundIdx].Key)
				assert.Equal(t, "png", tgtResults[foundIdx].Options["ext"])
			}
		}
	}
	assert.Equal(t, len(foundDMResults.Results), foundIdx)
}