Example #1
0
// WriteByteOffs generates a kv file with key-value pairs represented as a
// slice of buffer and some int slices of key offsets, key ends, value offsets,
// and value ends.
func WriteByteOffs(fp sophie.FsPath, buffer []byte,
	keyOffs, keyEnds, valOffs, valEnds []int) error {
	writer, err := fp.Create()
	if err != nil {
		return err
	}
	defer writer.Close()

	for i, keyOff := range keyOffs {
		keyEnd, valOff, valEnd := keyEnds[i], valOffs[i], valEnds[i]
		if err := sophie.VInt(keyEnd - keyOff).WriteTo(writer); err != nil {
			return err
		}
		if _, err := writer.Write(buffer[keyOff:keyEnd]); err != nil {
			return err
		}
		if err := sophie.VInt(valEnd - valOff).WriteTo(writer); err != nil {
			return err
		}
		if _, err := writer.Write(buffer[valOff:valEnd]); err != nil {
			return err
		}
	}
	return nil
}
Example #2
0
func TestReduceValues(t *testing.T) {
	/*
	 * Source are of two parts with nothing in each, but at each mapend, a pair
	 * of <"part", <part>> is collected. So the reducer will check whether a key
	 * of "part" with two different values are reduced.
	 */
	job := MrJob{
		Source: []Input{
			&InputStruct{
				PartCountF: func() (int, error) {
					return 2, nil
				},
			},
		},

		NewMapperF: func(src, part int) Mapper {
			return &MapperStruct{
				MapEndF: func(c PartCollector) error {
					return c.CollectTo(0, sophie.RawString("part"),
						sophie.VInt(part))
				},
			}
		},

		NewReducerF: func(part int) Reducer {
			st := make(map[sophie.VInt]bool)
			return &ReducerStruct{
				NewKeyF: sophie.NewRawString,
				NewValF: sophie.NewVInt,

				ReduceF: func(key sophie.SophieWriter,
					nextVal SophierIterator, c []sophie.Collector) error {

					keyStr := string(*key.(*sophie.RawString))
					if keyStr != "part" {
						return errors.New(`Key should be "part"`)
					}
					for {
						val, err := nextVal()
						if err == sophie.EOF {
							break
						}
						if err != nil {
							return err
						}

						part := *val.(*sophie.VInt)
						if st[part] {
							t.Errorf("Duplicated value: %v", part)
						}
						st[part] = true
					}
					return nil
				},
			}
		},
	}
	assert.NoErrorf(t, "job.Run failed: %v", job.Run())
}
Example #3
0
func (c *CrawlingEntry) WriteTo(w sophie.Writer) error {
	if err := sophie.Time(c.ScheduleTime).WriteTo(w); err != nil {
		return err
	}
	if err := sophie.VInt(c.Version).WriteTo(w); err != nil {
		return err
	}
	if err := sophie.String(c.Etag).WriteTo(w); err != nil {
		return err
	}
	return nil
}
Example #4
0
// sophie.CollectCloser interface
func (kvw *Writer) Collect(key, val sophie.SophieWriter) error {
	// write key
	kvw.objBuf.Reset()
	key.WriteTo(&kvw.objBuf)
	if err := sophie.VInt(kvw.objBuf.Len()).WriteTo(kvw.writer); err != nil {
		return err
	}
	if _, err := kvw.writer.Write(kvw.objBuf.Bytes()); err != nil {
		return err
	}
	// write val
	kvw.objBuf.Reset()
	val.WriteTo(&kvw.objBuf)
	if err := sophie.VInt(kvw.objBuf.Len()).WriteTo(kvw.writer); err != nil {
		return err
	}
	if _, err := kvw.writer.Write(kvw.objBuf.Bytes()); err != nil {
		return err
	}
	// success
	return nil
}