func Test_SequentialDataLoader(t *testing.T) {
	test_file := "test_instances.dat"
	num_feature_classes := 6
	test_cases := []DataInstance{
		{
			[]int{0, 1, 2, 3, 4, 5},
			1,
			2,
		}, {
			[]int{1, 2, 3, 4, 5, 0},
			0,
			1,
		},
	}
	err := util.WithNewOpenFileAsBufioWriter(test_file,
		func(w *bufio.Writer) error {
			for _, t_case := range test_cases {
				fmt.Fprintf(w, "%d\t%d", t_case.pos_y, t_case.neg_y)
				for i, v := range t_case.x {
					fmt.Fprintf(w, "\t%d:%d", i, v)
				}
				fmt.Fprint(w, "\n")
			}
			return nil
		})
	defer func() {
		os.Remove(test_file)
	}()
	if err != nil {
		t.Errorf("Failed to create test file: %s:%s.", test_file, err)
	}
	loader := NewInstanceLoader(test_file, num_feature_classes)
	defer func() {
		loader.Close()
	}()
	var i_accessor DataInstanceAccessor
	i_accessor = loader
	for i, t_case := range test_cases {
		instance, err := i_accessor.NextInstance()
		if err != nil {
			t.Errorf("TestCase: #%d: %s.", i, err)
		}
		if !(&instance).Equal(&t_case) {
			t.Errorf("TestCase: #%d: Expected %v but got %v.", i, t_case, instance)
		}
	}
	_, err = i_accessor.NextInstance()
	if err != io.EOF {
		t.Errorf("Expected EOF but got %s.", err)
	}
	i_accessor.Reset()
	_, err = i_accessor.NextInstance()
	if err != nil {
		t.Errorf("Expected instance but got error.")
	}
}
Beispiel #2
0
// Method GenerateBigramModel generates a bigram model from the inforamtion
// collected so far and save it to the given file. The file format of the model
// consists of multiple lines, each line contains first term, second term,
// and bigram frequency representing P(first term | second term), i.e. the
// probability of second term immediately follows the first term as seen from
// the processed documents.
func (g *NGramGenerator) GenerateBigramModel(filename string) error {
	modelWriter := func(w *bufio.Writer) error {
		for k, c := range g.biGram {
			//TODO(weidoliang): add smoothing
			p_k := float64(c) / float64(g.biGramCount)
			w.WriteString(fmt.Sprintf("%s %s %f\n", k.First, k.Second, p_k))
		}
		return nil
	}
	return util.WithNewOpenFileAsBufioWriter(filename, modelWriter)
}
Beispiel #3
0
// Method GenerateUnigramModel generates a unigram from the information collected
// so far and save it to the given file. The file format of the model consists
// of multiple lines of unigram and unigram frequency seperated by space.
func (g *NGramGenerator) GenerateUnigramModel(filename string) error {
	modelWriter := func(w *bufio.Writer) error {
		for k, c := range g.uniGram {
			//TODO(weidoliang): add smoothing to avoid zero probabilities
			p_k := float64(c) / float64(g.uniGramCount)
			w.WriteString(fmt.Sprintf("%s %f\n", k, p_k))
		}
		return nil
	}
	return util.WithNewOpenFileAsBufioWriter(filename, modelWriter)
}
func saveDataToFile(filename string, data []DataInstance) error {
	return util.WithNewOpenFileAsBufioWriter(filename,
		func(w *bufio.Writer) error {
			for _, d := range data {
				fmt.Fprintf(w, "%d\t%d", d.pos_y, d.neg_y)
				for i, v := range d.x {
					fmt.Fprintf(w, "\t%d:%d", i, v)
				}
				fmt.Fprint(w, "\n")
			}
			return nil
		})
}
Beispiel #5
0
func TestCEDict(t *testing.T) {
	lines := []string{
		"AA制 AA制 [A A zhi4] /to split the bill/to go Dutch/",
		"A咖 A咖 [A ka1] /class \"A\"/top grade/",
		"A片 A片 [A pian4] /adult movie/pornography/",
		"B型超聲 B型超声 [B xing2 chao1 sheng1] /type-B ultrasound/",
		"B超 B超 [B chao1] /type-B ultrasound/abbr. for B型超聲|B型超声[B xing2 chao1 sheng1]/",
		"C盤 C盘 [C pan2] /C drive or default startup drive (computing)/",
		"DNA鑒定 DNA鉴定 [D N A jian4 ding4] /DNA test/DNA testing/",
		"E仔 E仔 [e zai3] /MDMA (C11H15NO2)/",
		"G點 G点 [G dian3] /Gräfenberg Spot/G-Spot/",
		"K仔 K仔 [K zai3] /ketamine (slang)/",
	}
	dictResult := []struct {
		traditional string
		simplified  string
		pinyin      []string
		english     []string
	}{
		{"AA制", "AA制", []string{"A", "A", "zhi4"}, []string{"to split the bill", "to go Dutch"}},
		{"A咖", "A咖", []string{"A", "ka1"}, []string{"class \"A\"", "top grade"}},
		{"E仔", "E仔", []string{"e", "zai3"}, []string{"MDMA (C11H15NO2)"}},
	}

	dictPath := "cedict_test.txt"
	lineWriter := func(writer *bufio.Writer) error {
		for _, line := range lines {
			_, err := writer.WriteString(line + "\n")
			if err != nil {
				t.Errorf("Failed to write line[%s] to file [%s]", line, dictPath)
				return err
			}
		}
		return nil
	}
	err := util.WithNewOpenFileAsBufioWriter(dictPath, lineWriter)
	if err != nil {
		t.Errorf("Failed to create test file %s", dictPath)
		return
	}
	defer func() {
		os.Remove(dictPath)
	}()

	dict, err := LoadCEDict(dictPath, TRADITION_CHINESE)
	if err != nil {
		t.Errorf("Failed at LoadCEDict(%s)", dictPath)
	}

	for _, t_case := range dictResult {
		term := dict.Lookup(t_case.traditional)
		if term == nil {
			t.Errorf("Failed to Lookup key %s", t_case.traditional)
		}
		if term.Alternative != t_case.simplified {
			t.Errorf("Lookup returns incorrent alternative representation, expected [%v] but got [%v]",
				t_case.simplified, term.Alternative)
		}
		if !AreStringSlicesEqual(term.Pinyin, t_case.pinyin) {
			t.Errorf("Lookup returns incorrect pinyin representation. Expected [%v] but got [%v]",
				t_case.pinyin, term.Pinyin)
		}
		if !AreStringSlicesEqual(term.English, t_case.english) {
			t.Errorf("Lookup returns invalid english representation expected [%v] but got [%v]",
				t_case.english, term.English)
		}
	}
}