func (c *Log) reader() { defer c.wg.Done() for s := range c.chreader { s.lock.Lock() // Reset the bufferIO managers s.data.Reset() // Move to the next offset c.current += 1 c.current = c.current % c.numsegments if 0 == c.current { c.stats.Wrapped() c.wrapped = true } s.offset = int64(c.current) * int64(c.segmentsize) if c.wrapped { start := time.Now() n, err := c.fp.ReadAt(s.segmentbuf, s.offset) end := time.Now() c.stats.SegmentReadTimeRecord(end.Sub(start)) godbc.Check(n == len(s.segmentbuf)) godbc.Check(err == nil) } s.lock.Unlock() c.chavailable <- s } }
func (l *Log) Start() { godbc.Require(l.size != 0) godbc.Require(l.Msgchan != nil) godbc.Require(l.chwriting != nil) godbc.Require(l.chavailable != nil) godbc.Require(l.chreader != nil) godbc.Require(l.segmentbuffers == len(l.segments)) godbc.Require(l.segmentbuffers == len(l.chreader)) godbc.Require(0 == len(l.chavailable)) godbc.Require(0 == len(l.chwriting)) // Set up the first available segment l.segment = <-l.chreader l.segment.offset = int64(l.current) * int64(l.segmentsize) if l.wrapped { n, err := l.fp.ReadAt(l.segment.segmentbuf, l.segment.offset) godbc.Check(n == len(l.segment.segmentbuf), n) godbc.Check(err == nil) } // Now that we are sure everything is clean, // we can start the goroutines for i := 0; i < 32; i++ { l.wg.Add(1) go l.logread() } go l.server() go l.writer() go l.reader() l.wg.Add(3) }
func (c *Log) put(msg *message.Message) error { iopkt := msg.IoPkt() godbc.Require(iopkt.LogBlock < c.blocks) // Make sure the block number curresponds to the // current segment. If not, c.sync() will place // the next available segment into c.segment for !c.inRange(iopkt.LogBlock, c.segment) { c.sync() } // get log offset offset := c.offset(iopkt.LogBlock) // Write to current buffer n, err := c.segment.data.WriteAt(iopkt.Buffer, offset-c.segment.offset) godbc.Check(n == len(iopkt.Buffer)) godbc.Check(err == nil) c.segment.written = true // We have written the data, and we are done with the message msg.Done() return err }
// Return a 16-byte uuid func GenUUID() string { uuid := make([]byte, 16) n, err := rand.Read(uuid) godbc.Check(n == len(uuid), n, len(uuid)) godbc.Check(err == nil, err) return hex.EncodeToString(uuid) }
func (a *Asu) ioAt(b []byte, offset int64, isread bool) (n int, err error) { godbc.Require(a.fpsize != 0) // Head head_fp := int(offset / a.fpsize) head_fp_off := int64(offset % a.fpsize) godbc.Check(head_fp < len(a.fps), head_fp, len(a.fps)) // Tail tail_fp := int((offset + int64(len(b))) / (a.fpsize + 4*KB)) godbc.Check(tail_fp < len(a.fps), tail_fp, len(a.fps), offset, len(b), a.fpsize) if head_fp == tail_fp { if isread { return a.fps[head_fp].ReadAt(b, head_fp_off) } else { return a.fps[head_fp].WriteAt(b, head_fp_off) } } else { var ( wg sync.WaitGroup head_n, tail_n int head_err, tail_err error ) wg.Add(2) // Read head go func() { defer wg.Done() if isread { head_n, head_err = a.fps[head_fp].ReadAt(b[:a.fpsize-head_fp_off], head_fp_off) } else { head_n, head_err = a.fps[head_fp].WriteAt(b[:a.fpsize-head_fp_off], head_fp_off) } }() // Read tail go func() { defer wg.Done() if isread { tail_n, tail_err = a.fps[tail_fp].ReadAt(b[a.fpsize-head_fp_off:], 0) } else { tail_n, tail_err = a.fps[tail_fp].WriteAt(b[a.fpsize-head_fp_off:], 0) } }() wg.Wait() if head_err != nil { return head_n, head_err } else if tail_err != nil { return tail_n, tail_err } else { return head_n + tail_n, nil } } }
func GenUUID() string { uuid := make([]byte, 16) n, err := rand.Read(uuid) godbc.Check(n == len(uuid), n, len(uuid)) godbc.Check(err == nil, err) // TODO: verify the two lines implement RFC 4122 correctly uuid[8] = 0x80 // variant bits see page 5 uuid[4] = 0x40 // version 4 Pseudo Random, see page 7 return hex.EncodeToString(uuid) }
func (b *BrickEntry) DestroyCheck(db *bolt.DB, executor executors.Executor) error { godbc.Require(db != nil) godbc.Require(b.TpSize > 0) godbc.Require(b.Info.Size > 0) // Get node hostname var host string err := db.View(func(tx *bolt.Tx) error { node, err := NewNodeEntryFromId(tx, b.Info.NodeId) if err != nil { return err } host = node.ManageHostName() godbc.Check(host != "") return nil }) if err != nil { return err } // Create request req := &executors.BrickRequest{} req.Name = b.Info.Id req.Size = b.Info.Size req.TpSize = b.TpSize req.VgId = b.Info.DeviceId // Check brick on node return executor.BrickDestroyCheck(host, req) }
func (b *Brick) Destroy() error { godbc.Require(b.NodeId != "") godbc.Require(b.Path != "") godbc.Require(b.db != nil) // Just for now, it will work wih https://github.com/lpabon/vagrant-gfsm sshexec := ssh.NewSshExecWithKeyFile("vagrant", "insecure_private_key") godbc.Check(sshexec != nil) // Get node name var nodename string err := b.db.Reader(func() error { nodename = b.db.nodes[b.NodeId].Info.Name return nil }) // Delete brick storage commands := []string{ fmt.Sprintf("sudo umount /gluster/brick_%v", b.Id), fmt.Sprintf("sudo lvremove -f vg_%v/tp_%v", b.DeviceId, b.Id), fmt.Sprintf("sudo rmdir /gluster/brick_%v", b.Id), } _, err = sshexec.ConnectAndExec(nodename+":22", commands, nil) if err != nil { return err } err = b.FreeStorage() return err }
func (c *Log) writer() { defer c.wg.Done() for s := range c.chwriting { if s.written { start := time.Now() n, err := c.fp.WriteAt(s.segmentbuf, s.offset) end := time.Now() s.written = false c.stats.WriteTimeRecord(end.Sub(start)) godbc.Check(n == len(s.segmentbuf)) godbc.Check(err == nil) } else { c.stats.SegmentSkipped() } c.chreader <- s } close(c.chreader) }
func (v *VolumeEntry) CreateGlusterVolume() error { // Get node name var nodename string var cmd string err := v.db.Reader(func() error { nodename = v.db.nodes[v.State.Bricks[0].NodeId].Info.Name cmd = fmt.Sprintf("sudo gluster volume create %v replica %v ", v.Info.Name, v.State.Replica) for brick := range v.State.Bricks { cmd += fmt.Sprintf("%v:/gluster/brick_%v/brick ", v.db.nodes[v.State.Bricks[brick].NodeId].Info.Name, v.State.Bricks[brick].Id) } return nil }) if err != nil { return err } // Create gluster volume command // :TODO: Add force for now. It will allow silly bricks on the same systems // to work. Please remove once we add the intelligent ring cmd += " force" // Just for now, it will work wih https://github.com/lpabon/vagrant-gfsm sshexec := ssh.NewSshExecWithKeyFile("vagrant", "insecure_private_key") godbc.Check(sshexec != nil) // Create volume commands := []string{ cmd, fmt.Sprintf("sudo gluster volume start %v", v.Info.Name), } _, err = sshexec.ConnectAndExec(nodename+":22", commands, nil) if err != nil { return err } // Setup mount point v.Info.Mount = fmt.Sprintf("%v:%v", nodename, v.Info.Name) // State v.State.Created = true v.State.Started = true return nil }
func (c *Log) logread() { defer c.wg.Done() for m := range c.logreaders { iopkt := m.IoPkt() offset := c.offset(iopkt.LogBlock) // Read from storage start := time.Now() n, err := c.fp.ReadAt(iopkt.Buffer, offset) end := time.Now() c.stats.ReadTimeRecord(end.Sub(start)) godbc.Check(n == len(iopkt.Buffer)) godbc.Check(err == nil) c.stats.StorageHit() // Save in buffer cache //c.bc.Set(offset, iopkt.Buffer) // Return to caller m.Done() } }
func NewTestApp(dbfile string) *App { // Create simple configuration for unit tests appConfig := bytes.NewBuffer([]byte(`{ "glusterfs" : { "executor" : "mock", "allocator" : "simple", "db" : "` + dbfile + `" } }`)) app := NewApp(appConfig) godbc.Check(app != nil) return app }
func (b *BrickEntry) Destroy(db *bolt.DB, executor executors.Executor) error { godbc.Require(db != nil) godbc.Require(b.TpSize > 0) godbc.Require(b.Info.Size > 0) if b.State != BRICK_STATE_ONLINE { return nil } // Get node hostname var host string err := db.View(func(tx *bolt.Tx) error { node, err := NewNodeEntryFromId(tx, b.Info.NodeId) if err != nil { return err } host = node.ManageHostName() godbc.Check(host != "") return nil }) if err != nil { return err } // Create request req := &executors.BrickRequest{} req.Name = b.Info.Id req.Size = b.Info.Size req.TpSize = b.TpSize req.VgId = b.Info.DeviceId // Delete brick on node logger.Info("Deleting brick %v", b.Info.Id) err = executor.BrickDestroy(host, req) if err != nil { b.State = BRICK_STATE_FAILED return err } b.State = BRICK_STATE_DELETED godbc.Ensure(b.State == BRICK_STATE_DELETED) return nil }
func (b *Brick) Create() error { godbc.Require(b.db != nil) godbc.Require(b.DeviceId != "") // Just for now, it will work wih https://github.com/lpabon/vagrant-gfsm sshexec := ssh.NewSshExecWithKeyFile("vagrant", "insecure_private_key") godbc.Check(sshexec != nil) var nodename string err := b.db.Reader(func() error { nodename = b.db.nodes[b.NodeId].Info.Name return nil }) commands := []string{ fmt.Sprintf("sudo lvcreate -L %vKiB -T vg_%v/tp_%v -V %vKiB -n brick_%v", //Thin Pool Size uint64(float64(b.Size)*THINP_SNAPSHOT_FACTOR), // volume group b.DeviceId, // ThinP name b.Id, // Volume size b.Size, // Logical Vol name b.Id), fmt.Sprintf("sudo mkfs.xfs -i size=512 /dev/vg_%v/brick_%v", b.DeviceId, b.Id), fmt.Sprintf("sudo mkdir /gluster/brick_%v", b.Id), fmt.Sprintf("sudo mount /dev/vg_%v/brick_%v /gluster/brick_%v", b.DeviceId, b.Id, b.Id), fmt.Sprintf("sudo mkdir /gluster/brick_%v/brick", b.Id), } _, err = sshexec.ConnectAndExec(nodename+":22", commands, nil) if err != nil { return err } // SSH into node and create brick b.Path = fmt.Sprintf("/gluster/brick_%v", b.Id) return nil }
func (m *GlusterFSPlugin) peerProbe(name string) error { // Just for now, it will work wih https://github.com/lpabon/vagrant-gfsm sshexec := ssh.NewSshExecWithKeyFile("vagrant", "insecure_private_key") godbc.Check(sshexec != nil) // create the commands commands := []string{ fmt.Sprintf("sudo gluster peer probe %v", name), } _, err := sshexec.ConnectAndExec(m.peerHost+":22", commands, nil) if err != nil { return err } return nil }
func (b *BrickEntry) Create(db *bolt.DB, executor executors.Executor) error { godbc.Require(db != nil) godbc.Require(b.TpSize > 0) godbc.Require(b.Info.Size > 0) // Get node hostname var host string err := db.View(func(tx *bolt.Tx) error { node, err := NewNodeEntryFromId(tx, b.Info.NodeId) if err != nil { return err } host = node.ManageHostName() godbc.Check(host != "") return nil }) if err != nil { return err } // Create request req := &executors.BrickRequest{} req.Name = b.Info.Id req.Size = b.Info.Size req.TpSize = b.TpSize req.VgId = b.Info.DeviceId req.PoolMetadataSize = b.PoolMetadataSize // Create brick on node logger.Info("Creating brick %v", b.Info.Id) info, err := executor.BrickCreate(host, req) if err != nil { return err } b.Info.Path = info.Path b.State = BRICK_STATE_ONLINE godbc.Ensure(b.Info.Path != "") godbc.Ensure(b.State == BRICK_STATE_ONLINE) return nil }
func (v *VolumeEntry) createVolumeRequest(db *bolt.DB, brick_entries []*BrickEntry) (*executors.VolumeRequest, string, error) { godbc.Require(db != nil) godbc.Require(brick_entries != nil) // Setup list of bricks vr := &executors.VolumeRequest{} vr.Bricks = make([]executors.BrickInfo, len(brick_entries)) var sshhost string for i, b := range brick_entries { // Setup path vr.Bricks[i].Path = b.Info.Path // Get storage host name from Node entry err := db.View(func(tx *bolt.Tx) error { node, err := NewNodeEntryFromId(tx, b.Info.NodeId) if err != nil { return err } if sshhost == "" { sshhost = node.ManageHostName() } vr.Bricks[i].Host = node.StorageHostName() godbc.Check(vr.Bricks[i].Host != "") return nil }) if err != nil { logger.Err(err) return nil, "", err } } // Setup volume information in the request vr.Name = v.Info.Name v.Durability.SetExecutorVolumeRequest(vr) return vr, sshhost, nil }
func (v *VolumeEntry) Destroy() error { godbc.Require(v.db != nil) sshexec := ssh.NewSshExecWithKeyFile("vagrant", "insecure_private_key") godbc.Check(sshexec != nil) // Get node name var nodename string err := v.db.Reader(func() error { nodename = v.db.nodes[v.State.Bricks[0].NodeId].Info.Name return nil }) if err != nil { return err } // Shutdown volume commands := []string{ // stop gluster volume fmt.Sprintf("yes | sudo gluster volume stop %v force", v.Info.Name), fmt.Sprintf("yes | sudo gluster volume delete %v", v.Info.Name), } _, err = sshexec.ConnectAndExec(nodename+":22", commands, nil) if err != nil { return errors.New("Unable to shutdown volume") } // Destroy bricks var wg sync.WaitGroup for brick := range v.State.Bricks { wg.Add(1) go func(b int) { defer wg.Done() v.State.Bricks[b].Destroy() }(brick) } wg.Wait() return nil }
func (v *VolumeEntry) Expand(db *bolt.DB, executor executors.Executor, allocator Allocator, sizeGB int) (e error) { // Allocate new bricks in the cluster brick_entries, err := v.allocBricksInCluster(db, allocator, v.Info.Cluster, sizeGB) if err != nil { return err } // Setup cleanup function defer func() { if e != nil { logger.Debug("Error detected, cleaning up") // Remove from db db.Update(func(tx *bolt.Tx) error { for _, brick := range brick_entries { v.removeBrickFromDb(tx, brick) } err := v.Save(tx) godbc.Check(err == nil) return nil }) } }() // Create bricks err = CreateBricks(db, executor, brick_entries) if err != nil { logger.Err(err) return err } // Setup cleanup function defer func() { if e != nil { logger.Debug("Error detected, cleaning up") DestroyBricks(db, executor, brick_entries) } }() // Create a volume request to send to executor // so that it can add the new bricks vr, host, err := v.createVolumeRequest(db, brick_entries) if err != nil { return err } // Expand the volume _, err = executor.VolumeExpand(host, vr) if err != nil { return err } // Increase the recorded volume size v.Info.Size += sizeGB // Save volume entry err = db.Update(func(tx *bolt.Tx) error { // Save brick entries for _, brick := range brick_entries { err := brick.Save(tx) if err != nil { return err } } return v.Save(tx) }) return err }
func read(fp io.ReaderAt, c *cache.CacheMap, devid, offset, blocks uint32, buffer []byte) { godbc.Require(len(buffer)%(4*KB) == 0) here := make(chan *message.Message, blocks) cacheoffset := cache.Address64(cache.Address{Devid: devid, Block: offset}) msg := message.NewMsgGet() msg.RetChan = here iopkt := msg.IoPkt() iopkt.Buffer = buffer iopkt.Address = cacheoffset iopkt.Blocks = blocks msgs := 0 hitpkt, err := c.Get(msg) if err != nil { //fmt.Printf("|blocks:%d::hits:0--", blocks) // None found // Read the whole thing from backend fp.ReadAt(buffer, int64(offset)*4*KB) m := message.NewMsgPut() m.RetChan = here io := m.IoPkt() io.Address = cacheoffset io.Buffer = buffer io.Blocks = blocks c.Put(m) msgs++ } else if hitpkt.Hits != blocks { //fmt.Printf("|******blocks:%d::hits:%d--", blocks, hitpkt.Hits) // Read from storage the ones that did not have // in the hit map. var be_offset, be_block, be_blocks uint32 var be_read_ready = false for block := uint32(0); block < blocks; block++ { if !hitpkt.Hitmap[int(block)] { if be_read_ready { be_blocks++ } else { be_read_ready = true be_offset = offset + block be_block = block be_blocks++ } } else { if be_read_ready { // Send read msgs++ go readandstore(fp, c, devid, be_offset, be_blocks, cache.SubBlockBuffer(buffer, 4*KB, be_block, be_blocks), here) be_read_ready = false be_blocks = 0 be_offset = 0 be_block = 0 } } } if be_read_ready { msgs++ go readandstore(fp, c, devid, be_offset, be_blocks, cache.SubBlockBuffer(buffer, 4*KB, be_block, be_blocks), here) } } else { msgs = 1 } // Wait for blocks to be returned for msg := range here { msgs-- godbc.Check(msg.Err == nil, msg) godbc.Check(msgs >= 0, msgs) if msgs == 0 { return } } }
func (c *Log) get(msg *message.Message) error { var n int var err error defer msg.Done() iopkt := msg.IoPkt() var readmsg *message.Message var readmsg_block uint32 for block := uint32(0); block < iopkt.Blocks; block++ { ramhit := false index := iopkt.LogBlock + block offset := c.offset(index) // Check if the data is in RAM. Go through each buffered segment for i := 0; i < c.segmentbuffers; i++ { c.segments[i].lock.RLock() if c.inRange(index, &c.segments[i]) { ramhit = true n, err = c.segments[i].data.ReadAt(SubBlockBuffer(iopkt.Buffer, c.blocksize, block, 1), offset-c.segments[i].offset) godbc.Check(err == nil, err, block, offset, i) godbc.Check(uint32(n) == c.blocksize) c.stats.RamHit() } c.segments[i].lock.RUnlock() } // We did not find it in ram, let's start making a message if !ramhit { if readmsg == nil { readmsg = message.NewMsgGet() msg.Add(readmsg) io := readmsg.IoPkt() io.LogBlock = index io.Blocks = 1 readmsg_block = block } else { readmsg.IoPkt().Blocks++ } io := readmsg.IoPkt() io.Buffer = SubBlockBuffer(iopkt.Buffer, c.blocksize, readmsg_block, io.Blocks) } else if readmsg != nil { // We have a pending message, but the // buffer block was not contiguous. c.logreaders <- readmsg readmsg = nil } } // Send pending read if readmsg != nil { c.logreaders <- readmsg } return nil }
// Use as a goroutine to start the io workload // Create one of these per context set on Spc1Init() func (s *SpcInfo) Context(wg *sync.WaitGroup, iotime chan<- *IoStats, quit <-chan struct{}, runlen, context int) { defer wg.Done() // Spc generator specifies that each context have // 8 io streams. Spc generator will specify which // io stream to use. streams := 8 iostreams := make([]chan *IoStats, streams) var iostreamwg sync.WaitGroup for stream := 0; stream < streams; stream++ { // Allow for queued requests iostreams[stream] = make(chan *IoStats, 64) // Create 32 io contexts per stream for i := 0; i < 32; i++ { iostreamwg.Add(1) go s.sendio(&iostreamwg, iostreams[stream], iotime) } } start := time.Now() lastiotime := start stop := time.After(time.Second * time.Duration(runlen)) ioloop := true for ioloop { select { case <-quit: ioloop = false case <-stop: ioloop = false default: // Get the next io io := spc1.NewSpc1Io(context) err := io.Generate() godbc.Check(err == nil) godbc.Invariant(io) // Check how much time we should wait sleep_time := start.Add(io.When).Sub(lastiotime) if sleep_time > 0 { time.Sleep(sleep_time) } // Send io to io stream iostreams[io.Stream] <- &IoStats{ Io: io, Start: time.Now(), } lastiotime = time.Now() } } // close the streams for this context for stream := 0; stream < streams; stream++ { close(iostreams[stream]) } iostreamwg.Wait() }
func NewLog(logfile string, blocksize, blocks_per_segment, bcsize uint32, usedirectio bool) (*Log, uint32, error) { var err error // Initialize Log log := &Log{} log.stats = &logstats{} log.blocksize = blocksize log.blocks_per_segment = blocks_per_segment log.segmentsize = log.blocks_per_segment * log.blocksize // For DirectIO if usedirectio { log.fp, err = openFile(logfile, OSSYNC|os.O_RDWR|os.O_EXCL, os.ModePerm) } else { log.fp, err = openFile(logfile, os.O_RDWR|os.O_EXCL, os.ModePerm) } if err != nil { return nil, 0, err } // Determine cache size var size int64 size, err = log.fp.Seek(0, os.SEEK_END) if err != nil { return nil, 0, err } if size == 0 { return nil, 0, ErrLogTooSmall } blocks := size / int64(blocksize) if logMaxBlocks <= blocks { return nil, 0, ErrLogTooLarge } // We have to make sure that the number of blocks requested // fit into the segments tracked by the log log.numsegments = uint32(blocks) / log.blocks_per_segment log.size = uint64(log.numsegments) * uint64(log.segmentsize) // maximum number of aligned blocks to segments log.blocks = log.numsegments * log.blocks_per_segment // Adjust the number of segment buffers if log.numsegments < NumberSegmentBuffers { log.segmentbuffers = int(log.numsegments) } else { log.segmentbuffers = NumberSegmentBuffers } godbc.Check(log.numsegments != 0, fmt.Sprintf("bs:%v ssize:%v sbuffers:%v blocks:%v max:%v ns:%v size:%v\n", log.blocksize, log.segmentsize, log.segmentbuffers, log.blocks, log.blocks_per_segment, log.numsegments, log.size)) // Incoming message channel log.Msgchan = make(chan *message.Message, 32) log.quitchan = make(chan struct{}) log.logreaders = make(chan *message.Message, 32) // Segment channel state machine: // -> Client writes available segment // -> Segment written to storage // -> Segment read from storage // -> Segment available log.chwriting = make(chan *IoSegment, log.segmentbuffers) log.chavailable = make(chan *IoSegment, log.segmentbuffers) log.chreader = make(chan *IoSegment, log.segmentbuffers) // Set up each of the segments log.segments = make([]IoSegment, log.segmentbuffers) for i := 0; i < log.segmentbuffers; i++ { log.segments[i].segmentbuf = make([]byte, log.segmentsize) log.segments[i].data = bufferio.NewBufferIO(log.segments[i].segmentbuf) // Fill ch available with all the available buffers log.chreader <- &log.segments[i] } godbc.Ensure(log.size != 0) godbc.Ensure(log.blocksize == blocksize) godbc.Ensure(log.Msgchan != nil) godbc.Ensure(log.chwriting != nil) godbc.Ensure(log.chavailable != nil) godbc.Ensure(log.chreader != nil) godbc.Ensure(log.segmentbuffers == len(log.segments)) godbc.Ensure(log.segmentbuffers == len(log.chreader)) godbc.Ensure(0 == len(log.chavailable)) godbc.Ensure(0 == len(log.chwriting)) // Return the log object to the caller. // Also return the maximum number of blocks, which may // be different from what the caller asked. The log // will make sure that the maximum number of blocks // are contained per segment return log, log.blocks, nil }