// Run is the block's main loop. Here we listen on the different channels we set up. func (b *ParseCSV) Run() { var tree *jee.TokenTree var path string var err error var headers []string var csvReader *csv.Reader for { select { case ruleI := <-b.inrule: // set a parameter of the block path, err = util.ParseString(ruleI, "Path") if err != nil { b.Error(err) continue } token, err := jee.Lexer(path) if err != nil { b.Error(err) continue } tree, err = jee.Parser(token) if err != nil { b.Error(err) continue } headers, err = util.ParseArrayString(ruleI, "Headers") if err != nil { b.Error(err) continue } case <-b.quit: // quit the block return case msg := <-b.in: // deal with inbound data if tree == nil { continue } var data string dataI, err := jee.Eval(tree, msg) if err != nil { b.Error(err) continue } switch value := dataI.(type) { case []byte: data = string(value[:]) case string: data = value default: b.Error("data should be a string or a []byte") continue } csvReader = csv.NewReader(strings.NewReader(data)) csvReader.TrimLeadingSpace = true // allow records to have variable numbers of fields csvReader.FieldsPerRecord = -1 case <-b.inpoll: if csvReader == nil { b.Error("this block needs data to be pollable") break } record, err := csvReader.Read() if err != nil && err != io.EOF { b.Error(err) continue } row := make(map[string]interface{}) for fieldIndex, field := range record { if fieldIndex >= len(headers) { row[strconv.Itoa(fieldIndex)] = field } else { header := headers[fieldIndex] row[header] = field } } b.out <- row case MsgChan := <-b.queryrule: // deal with a query request MsgChan <- map[string]interface{}{ "Path": path, "Headers": headers, } } } }
func ImportDictionaries() map[string][]*models.SuggestItem { var itemMap = make(map[string][]*models.SuggestItem) fileInfo, err := ioutil.ReadDir(DataDirectory) if err != nil { log.Print(err) return itemMap } numberOfDictionaries := 0 for _, file := range fileInfo { if !file.IsDir() && (strings.HasSuffix(file.Name(), ".txt") || strings.HasSuffix(file.Name(), ".txt.gz")) { dictionaryFile := fmt.Sprintf("%s%s%s", DataDirectory, string(os.PathSeparator), file.Name()) dictionaryName := strings.TrimSuffix(strings.TrimSuffix(file.Name(), ".gz"), ".txt") log.Printf("Importing dictionary %s from file %s", dictionaryName, dictionaryFile) csvFile, err := os.Open(dictionaryFile) if err != nil { log.Print(err) continue } defer csvFile.Close() var csvReader *csv.Reader if strings.HasSuffix(file.Name(), ".txt.gz") { gzipReader, gzerr := gzip.NewReader(csvFile) if gzerr == nil { defer gzipReader.Close() csvReader = csv.NewReader(gzipReader) } else { log.Print(gzerr) continue } } else { csvReader = csv.NewReader(csvFile) } csvReader.FieldsPerRecord = 2 csvReader.Comma = '|' csvReader.LazyQuotes = true csvReader.TrimLeadingSpace = true rawCSVdata, err := csvReader.ReadAll() if err != nil { log.Print(err) continue } for _, each := range rawCSVdata { var suggestItem = new(models.SuggestItem) suggestItem.Term = each[0] weight, err := strconv.Atoi(each[1]) if err == nil { suggestItem.Weight = weight itemMap[dictionaryName] = append(itemMap[dictionaryName], suggestItem) } } numberOfDictionaries++ } } log.Printf("Imported %d dictionaries", numberOfDictionaries) return itemMap }