// WriteByteOffs generates a kv file with key-value pairs represented as a // slice of buffer and some int slices of key offsets, key ends, value offsets, // and value ends. func WriteByteOffs(fp sophie.FsPath, buffer []byte, keyOffs, keyEnds, valOffs, valEnds []int) error { writer, err := fp.Create() if err != nil { return err } defer writer.Close() for i, keyOff := range keyOffs { keyEnd, valOff, valEnd := keyEnds[i], valOffs[i], valEnds[i] if err := sophie.VInt(keyEnd - keyOff).WriteTo(writer); err != nil { return err } if _, err := writer.Write(buffer[keyOff:keyEnd]); err != nil { return err } if err := sophie.VInt(valEnd - valOff).WriteTo(writer); err != nil { return err } if _, err := writer.Write(buffer[valOff:valEnd]); err != nil { return err } } return nil }
func TestReduceValues(t *testing.T) { /* * Source are of two parts with nothing in each, but at each mapend, a pair * of <"part", <part>> is collected. So the reducer will check whether a key * of "part" with two different values are reduced. */ job := MrJob{ Source: []Input{ &InputStruct{ PartCountF: func() (int, error) { return 2, nil }, }, }, NewMapperF: func(src, part int) Mapper { return &MapperStruct{ MapEndF: func(c PartCollector) error { return c.CollectTo(0, sophie.RawString("part"), sophie.VInt(part)) }, } }, NewReducerF: func(part int) Reducer { st := make(map[sophie.VInt]bool) return &ReducerStruct{ NewKeyF: sophie.NewRawString, NewValF: sophie.NewVInt, ReduceF: func(key sophie.SophieWriter, nextVal SophierIterator, c []sophie.Collector) error { keyStr := string(*key.(*sophie.RawString)) if keyStr != "part" { return errors.New(`Key should be "part"`) } for { val, err := nextVal() if err == sophie.EOF { break } if err != nil { return err } part := *val.(*sophie.VInt) if st[part] { t.Errorf("Duplicated value: %v", part) } st[part] = true } return nil }, } }, } assert.NoErrorf(t, "job.Run failed: %v", job.Run()) }
func (c *CrawlingEntry) WriteTo(w sophie.Writer) error { if err := sophie.Time(c.ScheduleTime).WriteTo(w); err != nil { return err } if err := sophie.VInt(c.Version).WriteTo(w); err != nil { return err } if err := sophie.String(c.Etag).WriteTo(w); err != nil { return err } return nil }
// sophie.CollectCloser interface func (kvw *Writer) Collect(key, val sophie.SophieWriter) error { // write key kvw.objBuf.Reset() key.WriteTo(&kvw.objBuf) if err := sophie.VInt(kvw.objBuf.Len()).WriteTo(kvw.writer); err != nil { return err } if _, err := kvw.writer.Write(kvw.objBuf.Bytes()); err != nil { return err } // write val kvw.objBuf.Reset() val.WriteTo(&kvw.objBuf) if err := sophie.VInt(kvw.objBuf.Len()).WriteTo(kvw.writer); err != nil { return err } if _, err := kvw.writer.Write(kvw.objBuf.Bytes()); err != nil { return err } // success return nil }