Example #1
0
// Run is the block's main loop. Here we listen on the different channels we set up.
func (b *ParseCSV) Run() {
	var tree *jee.TokenTree
	var path string
	var err error
	var headers []string
	var csvReader *csv.Reader

	for {
		select {
		case ruleI := <-b.inrule:
			// set a parameter of the block
			path, err = util.ParseString(ruleI, "Path")
			if err != nil {
				b.Error(err)
				continue
			}
			token, err := jee.Lexer(path)
			if err != nil {
				b.Error(err)
				continue
			}
			tree, err = jee.Parser(token)
			if err != nil {
				b.Error(err)
				continue
			}

			headers, err = util.ParseArrayString(ruleI, "Headers")
			if err != nil {
				b.Error(err)
				continue
			}
		case <-b.quit:
			// quit the block
			return
		case msg := <-b.in:
			// deal with inbound data
			if tree == nil {
				continue
			}
			var data string

			dataI, err := jee.Eval(tree, msg)
			if err != nil {
				b.Error(err)
				continue
			}

			switch value := dataI.(type) {
			case []byte:
				data = string(value[:])

			case string:
				data = value

			default:
				b.Error("data should be a string or a []byte")
				continue
			}

			csvReader = csv.NewReader(strings.NewReader(data))
			csvReader.TrimLeadingSpace = true
			// allow records to have variable numbers of fields
			csvReader.FieldsPerRecord = -1

		case <-b.inpoll:
			if csvReader == nil {
				b.Error("this block needs data to be pollable")
				break
			}
			record, err := csvReader.Read()
			if err != nil && err != io.EOF {
				b.Error(err)
				continue
			}
			row := make(map[string]interface{})
			for fieldIndex, field := range record {
				if fieldIndex >= len(headers) {
					row[strconv.Itoa(fieldIndex)] = field
				} else {
					header := headers[fieldIndex]
					row[header] = field
				}
			}

			b.out <- row

		case MsgChan := <-b.queryrule:
			// deal with a query request
			MsgChan <- map[string]interface{}{
				"Path":    path,
				"Headers": headers,
			}
		}
	}
}
Example #2
0
func ImportDictionaries() map[string][]*models.SuggestItem {
	var itemMap = make(map[string][]*models.SuggestItem)

	fileInfo, err := ioutil.ReadDir(DataDirectory)
	if err != nil {
		log.Print(err)
		return itemMap
	}
	numberOfDictionaries := 0
	for _, file := range fileInfo {
		if !file.IsDir() && (strings.HasSuffix(file.Name(), ".txt") || strings.HasSuffix(file.Name(), ".txt.gz")) {
			dictionaryFile := fmt.Sprintf("%s%s%s", DataDirectory, string(os.PathSeparator), file.Name())
			dictionaryName := strings.TrimSuffix(strings.TrimSuffix(file.Name(), ".gz"), ".txt")
			log.Printf("Importing dictionary %s from file %s", dictionaryName, dictionaryFile)

			csvFile, err := os.Open(dictionaryFile)
			if err != nil {
				log.Print(err)
				continue
			}
			defer csvFile.Close()
			var csvReader *csv.Reader
			if strings.HasSuffix(file.Name(), ".txt.gz") {
				gzipReader, gzerr := gzip.NewReader(csvFile)
				if gzerr == nil {
					defer gzipReader.Close()
					csvReader = csv.NewReader(gzipReader)
				} else {
					log.Print(gzerr)
					continue
				}
			} else {
				csvReader = csv.NewReader(csvFile)
			}

			csvReader.FieldsPerRecord = 2
			csvReader.Comma = '|'
			csvReader.LazyQuotes = true
			csvReader.TrimLeadingSpace = true

			rawCSVdata, err := csvReader.ReadAll()
			if err != nil {
				log.Print(err)
				continue
			}

			for _, each := range rawCSVdata {
				var suggestItem = new(models.SuggestItem)
				suggestItem.Term = each[0]
				weight, err := strconv.Atoi(each[1])
				if err == nil {
					suggestItem.Weight = weight
					itemMap[dictionaryName] = append(itemMap[dictionaryName], suggestItem)
				}

			}
			numberOfDictionaries++
		}
	}

	log.Printf("Imported %d dictionaries", numberOfDictionaries)
	return itemMap
}