func (c *segmentCollection) addSegment(newseg *segment) { // add at the end for _, seg := range c.end { if seg.positionEnd <= newseg.positionStart && newseg.tokens.IsOverlapsing(seg.tokens) { log.Debug("Added segment %s overlapse %s at end. Marking overlapsed as not writable!", newseg, seg) seg.writable = false seg.nextSegments = append(seg.nextSegments, newseg) newseg.prevSegments = append(newseg.prevSegments, seg) } } c.end = append(c.end, newseg) c.cleanup(true) // add at beggining for _, seg := range c.beginning { if newseg.positionEnd <= seg.positionStart && newseg.tokens.IsOverlapsing(seg.tokens) { log.Debug("Added segment %s overlapse %s at beginning.", newseg, seg) seg.prevSegments = append(seg.prevSegments, newseg) newseg.nextSegments = append(newseg.nextSegments, seg) } } c.beginning = append(c.beginning, newseg) c.cleanup(false) }
func (m *segmentManager) getWritableSegment(token Token) *segment { if !m.tokens.IsWithin(token) { log.Fatal("Got a token not within range: got %d, range from %d to %d", token, m.tokens.from, m.tokens.to) } seg := m.timeline.getEndSegment(token) // no writable segment found, create one if seg == nil || !seg.writable { if seg == nil { log.Debug("Couldn't find a segment for token %d", token) } else { log.Debug("Segment for token %d is not writable", token) } // find the right chunk for this token chunkLength := int(math.Ceil(float64(m.tokens.Length()) / SEG_CHUNKING)) found := false chunk := m.tokens for !found { to := int(chunk.from) + chunkLength if to > int(m.tokens.to) { // prevent overflow to = int(m.tokens.to) } chunk = TokenRange{chunk.from, Token(to)} if chunk.IsWithin(token) { found = true } else { chunk = TokenRange{chunk.to + 1, m.tokens.to} } } pos := uint64(0) if seg != nil { pos = seg.positionEnd // TODO: THIS IS NOT GOOD! IT SHOULD TAKE THE BIGGEST END POSITION OF ALL OVERRIDEN SEGMENTS } log.Info("Creating a new segment for tokens %d to %d @ %d", chunk.from, chunk.to, pos) seg = createSegment(m.dataDir, chunk.from, chunk.to, pos) m.timeline.addSegment(seg) // find an id, assign it to the segment for m.segments[m.nextSegId] != nil { m.nextSegId++ } seg.id = m.nextSegId m.segments[seg.id] = seg m.nextSegId++ } return seg }
func (api *api) head(resp *rest.ResponseWriter, req *rest.Request, path *Path) { log.Debug("FSS API: Received a head request for path %s\n", path) header, err := api.fss.HeaderJSON(path, nil) resp.Write(header) log.Debug("API: Fs Header data returned\n") if err != nil { log.Error("API: Fs header returned an error: %s\n", err) resp.ReturnError(err.String()) } }
func (api *api) get(resp *rest.ResponseWriter, req *rest.Request, path *Path) { log.Debug("FSS API: Received a read request for path %s\n", path) // TODO: Handle offset // TODO: Handle version // TODO: Handle size _, err := api.fss.Read(path, 0, -1, 0, resp, nil) log.Debug("API: Fs Read data returned\n") if err != nil && err != os.EOF { log.Error("API: Fs Read returned an error for %s: %s\n", path, err) resp.ReturnError(err.String()) } }
func (fss *FsService) RemoteDeleteReplica(message *comm.Message) { // read payload str, _ := message.Message.ReadString() path := NewPath(str) // path version, _ := message.Message.ReadInt64() // version log.Debug("%d FSS: Received sync delete replica for path '%s' version '%d'\n", fss.cluster.MyNode.Id, path, version) // Get the header localheader := fss.headers.GetFileHeader(path) // Delete the file localy file := OpenFile(fss, localheader, version) file.Delete() // Delete in the header localheader.header.Exists = false localheader.header.ClearChildren() localheader.header.Size = 0 localheader.Save() // todo: add to garbage collector // Send an acknowledgement msg := fss.comm.NewMsgMessage(fss.serviceId) fss.comm.RespondSource(message, msg) }
func (fss *FsService) RemoteExists(message *comm.Message) { // Read payload pathString, _ := message.Message.ReadString() path := NewPath(pathString) // path forceLocal, _ := message.Message.ReadBool() // force local log.Debug("%d: FSS: Received new exists message for path %s\n", fss.cluster.MyNode.Id, path) result := fss.ring.Resolve(path.String()) // get header, check if it exists or handed off localheader := fss.headers.GetFileHeader(path) // if file exists locally or its been handed off if localheader.header.Exists { response := fss.comm.NewMsgMessage(fss.serviceId) response.Message.WriteBool(true) fss.comm.RespondSource(message, response) } else { // if I'm the master or we force local if result.IsFirst(fss.cluster.MyNode) || forceLocal { response := fss.comm.NewMsgMessage(fss.serviceId) response.Message.WriteBool(false) fss.comm.RespondSource(message, response) } else { fss.comm.RedirectFirst(result, message) } } }
func (fss *FsService) RemoteReplicaVersion(message *comm.Message) { // Read payload str, _ := message.Message.ReadString() // path path := NewPath(str) version, _ := message.Message.ReadInt64() // current version nextversion, _ := message.Message.ReadInt64() // next version size, _ := message.Message.ReadInt64() // size mimetype, _ := message.Message.ReadString() // mimetype log.Debug("%d FSS: Received sync version replica for path '%s'\n", fss.cluster.MyNode.Id, path) // Get the header localheader := fss.headers.GetFileHeader(path) // Update the header localheader.header.Path = path.String() localheader.header.Name = path.BaseName() localheader.header.Exists = true localheader.header.Version = version localheader.header.NextVersion = nextversion localheader.header.MimeType = mimetype localheader.header.Size = size localheader.Save() // enqueue replication for background download fss.replicationEnqueue(path) // Send an acknowledgement req := fss.comm.NewMsgMessage(fss.serviceId) fss.comm.RespondSource(message, req) }
func (cs *ClusterService) Boot() { myNode := cs.cluster.MyNode log.Debug("%d: Booting cluster service", myNode.Id) cs.loadCluster() // Contact master or listen for incoming requests if I'm master masters := cs.cluster.Rings.GetRing(cs.masterRing).ResolveToken(master_token) if masters.IsFirst(myNode) { // TODO: We should contact secondary master first! myNode.Status = cluster.Status_Online cs.state = state_online myNode.Adhoc = false } else { myNode.Status = cluster.Status_Offline cs.state = state_offline // switch to adhoc, we are not yet in the cluster myNode.Adhoc = true // contact master cs.ContactMaster() } }
func (cs *ClusterService) RemoteContactMaster(msg *comm.Message) { myNode := cs.cluster.MyNode log.Debug("%d: Got a ContactMaster request: %s", myNode.Id, msg) if cs.state == state_online { masters := cs.cluster.Rings.GetRing(cs.masterRing).ResolveToken(master_token) // make sure I'm the master, and online if myNode.Status == cluster.Status_Online && masters.IsFirst(myNode) { node := cluster.NewEmptyNode() err := node.Unserialize(msg.Message) if err != nil { cs.comm.RespondError(msg, os.NewError("Couldn't unmarshal node data")) log.Error("Couldn't unmarshal node data: %s", err) return } node.Status = cluster.Status_Online cs.cluster.MergeNode(node, true) // TODO: Send the cluster back to the node resp := cs.comm.NewMsgMessage(cs.serviceId) cs.comm.RespondSource(msg, resp) //log.Fatal("BLABLA", node.Id) // TODO: LOCK SO THAT WE DON'T MAKE IT ONLINE TWICE // TODO: Accept the node // TODO: Check its rings // TODO: Broadcast the change } else { cs.comm.RedirectFirst(masters, msg) } } }
func (fss *FsService) RemoteHeader(message *comm.Message) { // read payload str, _ := message.Message.ReadString() path := NewPath(str) log.Debug("FSS: Received new need header message for path %s\n", path) result := fss.ring.Resolve(path.String()) // TODO: When versioning will be added, should not be that way since we may have it, but not of the right version if result.InOnlineNodes(fss.cluster.MyNode) { // If file exists localy or I'm the master localheader := fss.headers.GetFileHeader(path) if localheader.header.Exists || result.IsFirst(fss.cluster.MyNode) { // respond data response := fss.comm.NewDataMessage(fss.serviceId) localheader := fss.headers.GetFileHeader(path) header := localheader.header.ToJSON() response.DataSize = int64(len(header)) response.Data = bytes.NewBuffer(header) fss.comm.RespondSource(message, response) } else { fss.comm.RedirectFirst(result, message) } } else { fss.comm.RedirectOne(result, message) } }
func (api *api) post(resp *rest.ResponseWriter, req *rest.Request, path *Path) { log.Debug("FSS API: Received a write request for %d bytes\n", req.ContentLength) mimetype := "application/octet-stream" mtar, ok := req.Params["type"] if ok { mimetype = mtar[0] } err := api.fss.Write(path, req.ContentLength, mimetype, req.Body, nil) if err != nil { log.Error("API: Fs Write returned an error: %s\n", err) resp.ReturnError(err.String()) } log.Debug("API: Fs Write returned\n") }
// // Removes all not writable segments that aren't covering any part of the range anymore // func (c *segmentCollection) cleanup(end bool) { newSegs := make([]*segment, 0) var toClean []*segment if end { toClean = c.end } else { toClean = c.beginning } for o := 0; o < len(toClean); o++ { oSeg := toClean[o] // if the segment is not writable OR we are cleaning the beginning if !oSeg.writable || !end { covRange := oSeg.tokens useless := false // iterate on all segments, remove ranges covered from other segments from covRange for i := 0; i < len(toClean) && !useless; i++ { iSeg := toClean[i] // it's not the segment we are iterating on the outter loop + it's after or before (depending if we clean the end or beginning) if (i != o) && ((end && iSeg.positionStart >= oSeg.positionEnd) || (!end && iSeg.positionEnd <= oSeg.positionStart)) { if iSeg.tokens.IsWithin(covRange.from) { covRange = TokenRange{iSeg.tokens.to, covRange.to} } if iSeg.tokens.IsWithin(covRange.to) { covRange = TokenRange{covRange.from, iSeg.tokens.from} } } // as soon as the coverage range is <= 0, we know that this range is useless if covRange.Length() <= 0 { useless = true log.Debug("Segment %s now useless at end=%v. Removing it.", oSeg, end) } } // only add the segment to the new array if it isn't useless if !useless { newSegs = append(newSegs, oSeg) } } else if end { // if its a writable segment, add it newSegs = append(newSegs, oSeg) } } if end { c.end = newSegs } else { c.beginning = newSegs } }
func (s *Server) start() { var err os.Error log.Debug("ServiceServer: starting listening tcp socket on %s\n", s.tcpaddr) s.tcpsock, err = net.ListenTCP("tcp", s.tcpaddr) if err != nil { log.Fatal("Couldn't create TCP server listener: %s\n", err) } go s.acceptTCP() log.Debug("ServiceServer: starting listening udp socket on %s\n", s.udpaddr) s.udpsock, err = net.ListenUDP("udp", s.udpaddr) if err != nil { log.Fatal("Couldn't create UDP server listener: %s\n", err) } go s.acceptUDP() }
// Returns an API Server func NewServer(handler Handler, adr string) *Server { server := new(Server) server.handler = handler server.servmux = http.NewServeMux() con, err := net.Listen("tcp", adr) if err != nil { log.Error("API: Couldn't create listener socket: %s\n", err) } // Add handling function at root, delegating everything to the handler server.servmux.HandleFunc("/", func(httpresp http.ResponseWriter, httpreq *http.Request) { log.Debug("API: Connection on url %s\n", httpreq.URL) resp := &ResponseWriter{httpresp} var req *Request if req = NewRequest(resp, httpreq); req == nil { log.Error("API: Couldn't create request object") return } handler.Handle(resp, req) // TODO: Remove that! Shouldn't be here!! // Read the rest, so we don't cause Broken Pipe on the other end if we don't read to the end ioutil.ReadAll(req.Body) }) // Start serving the API on another thread go func() { log.Debug("API: Starting API server on adr %s\n", adr) err = http.Serve(con, server.servmux) con.Close() if err != nil { log.Fatal("API: Serve error: ", err.String()) } }() return server }
func (fss *FsService) RemoteChildrenList(message *comm.Message) { str, _ := message.Message.ReadString() forceLocal, _ := message.Message.ReadBool() path := NewPath(str) log.Debug("FSS: Received message to list child for %s\n", path) result := fss.ring.Resolve(path.String()) // I'm one of the nodes if result.InOnlineNodes(fss.cluster.MyNode) { localheader := fss.headers.GetFileHeader(path) // we have the header locally if localheader.header.Exists { children := localheader.header.Children // Create the message to send back var count uint16 = uint16(len(children)) response := fss.comm.NewMsgMessage(fss.serviceId) // Write child count response.Message.WriteUint16(count) // children count for _, child := range children { response.Message.WriteString(child.Name) // name response.Message.WriteString(child.MimeType) // type response.Message.WriteInt64(child.Size) // size } fss.comm.RespondSource(message, response) } else { // we don't have the header, we redirect to the appropriate node // if i'm the master or forced local if result.IsFirst(fss.cluster.MyNode) || forceLocal { fss.comm.RespondError(message, ErrorFileNotFound) } else { fss.comm.RedirectFirst(result, message) } } } else { // I'm not one of the nodes // if its was forced local if forceLocal { fss.comm.RespondError(message, ErrorFileNotFound) } else { fss.comm.RedirectOne(result, message) } } }
func (fss *FsService) sendToReplicaNode(resolv *cluster.ResolveResult, req_cb func(node *cluster.Node) *comm.Message) chan os.Error { toSyncCount := resolv.Count() - 1 // minus one for the master var syncError os.Error = nil myNodeId := fss.cluster.MyNode.Id errChan := make(chan os.Error, 1) // channel used to return data to the messageor c := make(chan bool, toSyncCount) // channel used to wait for all replicas if toSyncCount > 0 { go func() { for i := 0; i < resolv.Count(); i++ { node := resolv.Get(i) if node.Status == cluster.Status_Online && node.Id != myNodeId { // get the new message req := req_cb(node) req.Timeout = 1000 // TODO: Config req.OnResponse = func(message *comm.Message) { log.Debug("%d: FSS: Received acknowledge message for message %s\n", fss.cluster.MyNode.Id, req) c <- true } req.OnTimeout = func(last bool) (retry bool, handled bool) { // TODO: Retry it! syncError = comm.ErrorTimeout log.Error("%d: FSS: Couldn't send message to replicate node %s because of a timeout for message %s\n", fss.cluster.MyNode.Id, node, req) c <- true return true, false } req.OnError = func(message *comm.Message, syncError os.Error) { log.Error("%d: FSS: Received an error while sending to replica %s for message %s: %d %s\n", fss.cluster.MyNode.Id, req, node, syncError) c <- true } fss.comm.SendNode(node, req) } } // wait for nodes to sync the handoff for i := 0; i < toSyncCount; i++ { <-c } errChan <- syncError }() } else { errChan <- nil } return errChan }
func (api *api) delete(resp *rest.ResponseWriter, req *rest.Request, path *Path) { log.Debug("FSS API: Received a delete request for %s\n", path) recursive := false mrec, ok := req.Params["recursive"] if ok { recursive = (mrec[0] == "1") || (mrec[0] == "true") } err := api.fss.Delete(path, recursive, nil) if err != nil { log.Error("API: Fs Write returned an error: %s\n", err) resp.ReturnError(err.String()) } }
func (fss *FsService) RemoteChildRemove(message *comm.Message) { str, _ := message.Message.ReadString() // path path := NewPath(str) child, _ := message.Message.ReadString() // child log.Debug("FSS: Received message to remove the child %s from %s\n", child, path) // resolve path mynode := fss.cluster.MyNode resolv := fss.ring.Resolve(path.String()) // only the master has the lock if resolv.IsFirst(mynode) { fss.Lock(path.String()) } localheader := fss.headers.GetFileHeader(path) localheader.header.RemoveChild(child) if resolv.IsFirst(mynode) { // replicate to nodes syncChan := fss.sendToReplicaNode(resolv, func(node *cluster.Node) *comm.Message { msg := fss.comm.NewMsgMessage(fss.serviceId) msg.Function = "RemoteChildRemove" msg.Message.WriteString(path.String()) // path msg.Message.WriteString(child) // child name return msg }) // wait for replicas sync syncError := <-syncChan // check for sync error if syncError != nil { log.Error("FSS: Couldn't replicate remove child to nodes: %s\n", syncError) fss.comm.RespondError(message, os.NewError("Couldn't replicate remove child to all nodes")) } // unlock fss.Unlock(path.String()) } // Send an acknowledgement msg := fss.comm.NewMsgMessage(fss.serviceId) fss.comm.RespondSource(message, msg) }
func TestPersistence(t *testing.T) { log.Debug("TestPersistence") SetupCluster() //start master StartNode(0) // starter another node StartNode(1) WaitOnline(1, 10) StopNode(1) StartNode(1) }
func (fss *FsService) RemoteRead(message *comm.Message) { // Read payload str, _ := message.Message.ReadString() // path path := NewPath(str) offset, _ := message.Message.ReadInt64() // offset size, _ := message.Message.ReadInt64() // size version, _ := message.Message.ReadInt64() // version forceLocal, _ := message.Message.ReadBool() // force local log.Debug("FSS: Received new need read message for path %s, version %d, at offset %d, size of %d\n", path, version, offset, size) result := fss.ring.Resolve(path.String()) // TODO: When versioning will be added, should not be that way since we may have it, but not of the right version if result.InOnlineNodes(fss.cluster.MyNode) || forceLocal { localheader := fss.headers.GetFileHeader(path) file := OpenFile(fss, localheader, version) // get the file // if the file exists and we have it locally if localheader.header.Exists && file.Exists() { // Send it back response := fss.comm.NewDataMessage(fss.serviceId) // TODO: Handle offset response.Message.WriteInt64(offset) // offset response.Message.WriteInt64(localheader.header.Version) // version // TODO: Handle the asked read size response.DataSize = localheader.header.Size response.Data = io.Reader(file) response.DataAutoClose = true fss.comm.RespondSource(message, response) } else { // Check if I'm supposed to have it if result.IsFirst(fss.cluster.MyNode) || forceLocal { fss.comm.RespondError(message, ErrorFileNotFound) } else { fss.comm.RedirectFirst(result, message) } } } else { fss.comm.RedirectOne(result, message) } }
func (cs *ClusterService) ContactMaster() { myNode := cs.cluster.MyNode log.Debug("%d: Contacting master...", myNode.Id) msg := cs.comm.NewMsgMessage(cs.serviceId) msg.Function = "RemoteContactMaster" err := cs.cluster.MyNode.Serialize(msg.Message) if err != nil { log.Fatal("Couldn't marshal my node: %s", err) } msg.OnResponse = func(msg *comm.Message) { // TODO: Merge the cluster we got into our cluster myNode.Status = cluster.Status_Online } // send our node to master masters := cs.cluster.Rings.GetRing(cs.masterRing).Resolve(master_token) cs.comm.SendFirst(masters, msg) }
func (cs *ClusterService) loadCluster() { cs.clusterMutex.Lock() // Load data log.Debug("cls: Loading cluster data...") stat, err := os.Stat(cs.clsDataPath) if err == nil && stat.IsRegular() { file, err := os.Open(cs.clsDataPath) if err == nil { typedFile := typedio.NewReader(file) cs.clusterVersion, _ = typedFile.ReadInt64() // cluster version cs.diskVerson = cs.clusterVersion nbNodes, _ := typedFile.ReadUint16() // nodes count var i uint16 for i = 0; i < nbNodes; i++ { node := cluster.NewEmptyNode() node.Unserialize(typedFile) node.Status = cluster.Status_Offline cs.cluster.MergeNode(node, false) // merge node, doesn't notify } } else { log.Error("cls: Error while opening data file", err) } } // replay commit log log.Info("cls: Replaying commit log...") cs.commitlog.Replay() // TODO: Load cluster data cs.clusterMutex.Unlock() }
func TestBoot(t *testing.T) { log.Debug("TestBoot") SetupCluster() // start master StartNode(0) // start another node StartNode(1) err := WaitOnline(1, 10) if err != nil { t.Errorf("1) Got an error: %s", err) } for i, proc := range tc.nodes { if proc != nil { if proc.Cluster.Nodes.Get(1) == nil || proc.Cluster.Nodes.Get(1).Status != cluster.Status_Online { t.Errorf("2) Node 1 is not online for proc %d", i) } } } }
func (comm *Comm) SendNode(node *cluster.Node, message *Message) { // resolve function and service names message.PrepareSend() if node.Equals(comm.Cluster.MyNode) { log.Debug("%d: Looping message (%s) locally\n", comm.Cluster.MyNode.Id, message) // if this message need a acknowledge, add it to the ack watcher if message.Timeout > 0 || message.OnError != nil || message.OnResponse != nil { comm.watchMessage(message, node) } message.SeekZero() comm.handleMessage(message) message.Release() } else { // watch message here, because getting a connection may timeout if node is // down and trying to open a TCP connection if message.Timeout > 0 || message.OnError != nil || message.OnResponse != nil { comm.watchMessage(message, node) } // TODO: Replace by a queue using channel so we don't create too many connections go func() { // We use TCP if message is more than 8000 bytes (maximum UDP packet size) var connection *Connection if message.Type == T_MSG && message.TotalSize() < MAX_MSG_SIZE { connection = comm.pool.GetMsgConnection(node) } else { connection = comm.pool.GetDataConnection(node) } if connection == nil { // Report as an error because the tracker should have reported the timeout if its a timeout if message.OnError != nil { message.OnError(message, os.NewError("Couldn't get TCP connection to node")) } else { log.Error("Couldn't get a connection for message %s to %s\n", message, node) } return } log.Debug("%d: Sending message (%s) to %s via %s\n", comm.Cluster.MyNode.Id, message, node, connection) bufwriter := bufio.NewWriter(io.Writer(connection.gocon)) buffer := io.Writer(bufwriter) message.SeekZero() err := message.writeMessage(buffer) if err != nil { log.Error("%d: Got an error writing message %s to socket for %s via %s: %s\n", err, comm.Cluster.MyNode.Id, message, node, connection, err) } err = bufwriter.Flush() if err != nil { log.Error("%d: Got an error sending message %s to %s via %s: %s\n", comm.Cluster.MyNode.Id, message, node, connection, err) if message.OnError != nil { message.OnError(message, err) } } // release the message and connection connection.Release() message.Release() }() } }
/* * Children */ func (fss *FsService) RemoteChildAdd(message *comm.Message) { // read payload str, _ := message.Message.ReadString() path := NewPath(str) // path child, _ := message.Message.ReadString() // name mimetype, _ := message.Message.ReadString() // type size, _ := message.Message.ReadInt64() // size log.Debug("%d FSS: Received message to add new child '%s' to '%s' (size=%d, type=%s)\n", fss.cluster.MyNode.Id, child, path, size, mimetype) // resolve path mynode := fss.cluster.MyNode resolv := fss.ring.Resolve(path.String()) // only the master has the lock if resolv.IsFirst(mynode) { fss.Lock(path.String()) } // add the child to the header localheader := fss.headers.GetFileHeader(path) existed := localheader.header.Exists localheader.header.Exists = true localheader.header.AddChild(child, mimetype, size) localheader.Save() // if i'm master, replicate to nodes and add ourself to master if resolv.IsFirst(mynode) { // unlock the path fss.Unlock(path.String()) // cascade: add ourself to our parent. (check write, delete if logic changes here) syncParent := make(chan os.Error, 1) go func() { parent := path.ParentPath() if !path.Equals(parent) { msg := fss.comm.NewMsgMessage(fss.serviceId) msg.Function = "RemoteChildAdd" msg.Timeout = 5000 // TODO: Config msg.Retries = 10 msg.RetryDelay = 100 msg.OnTimeout = func(last bool) (retry bool, handled bool) { if last { log.Error("%d FSS: Couln't add %s to parent after 10 tries\n", fss.cluster.MyNode.Id, path) } return true, false } msg.LastTimeoutAsError = true msg.OnError = func(response *comm.Message, error os.Error) { syncParent <- error } msg.OnResponse = func(response *comm.Message) { syncParent <- nil } msg.Message.WriteString(parent.String()) // path msg.Message.WriteString(path.Parts[len(path.Parts)-1]) // name msg.Message.WriteString(mimetype) // type msg.Message.WriteInt64(size) // size parentResolve := fss.ring.Resolve(parent.String()) fss.comm.SendFirst(parentResolve, msg) } else { syncParent <- nil } }() // replicate to nodes syncChan := fss.sendToReplicaNode(resolv, func(node *cluster.Node) *comm.Message { msg := fss.comm.NewMsgMessage(fss.serviceId) msg.Function = "RemoteChildAdd" msg.Message.WriteString(path.String()) // path msg.Message.WriteString(child) // name msg.Message.WriteString(mimetype) // type msg.Message.WriteInt64(size) // size return msg }) // wait for replicas sync check for sync error syncError := <-syncChan if syncError != nil { log.Error("FSS: Couldn't replicate add child to nodes: %s\n", syncError) fss.comm.RespondError(message, os.NewError(fmt.Sprintf("Couldn't replicate add child to nodes: %s\n", syncError))) // TODO: ROLLBACK!! } if !existed { parentError := <-syncParent if parentError != nil { log.Error("FSS: Couldn't add myself to parent: %s\n", parentError) fss.comm.RespondError(message, parentError) // TODO: ROLLBACK!! } } } // Send an acknowledgement msg := fss.comm.NewMsgMessage(fss.serviceId) fss.comm.RespondSource(message, msg) }
func (fss *FsService) RemoteWrite(message *comm.Message) { // read payload str, _ := message.Message.ReadString() // path path := NewPath(str) mimetype, _ := message.Message.ReadString() // mimetype log.Debug("%d FSS: Received new write message for path %s and size of %d and type %s\n", fss.cluster.MyNode.Id, path, message.DataSize, mimetype) resolveResult := fss.ring.Resolve(path.String()) if !resolveResult.IsFirst(fss.cluster.MyNode) { log.Error("FSS: Received write for which I'm not master: %s\n", message) fss.comm.RespondError(message, os.NewError(fmt.Sprintf("Cannot accept write, I'm not the master for %s", path))) return } // Write the data to a temporary file tempfile := fmt.Sprintf("%s/%d.%d.data", os.TempDir(), path.Hash(), time.Nanoseconds()) // TODO: Use config to get temp path fd, err := os.Create(tempfile) if err != nil { os.Remove(tempfile) log.Error("%d: FSS: Got an error while creating a temporary file (%s) for write of %s: %s", tempfile, path, err) fss.comm.RespondError(message, os.NewError(fmt.Sprintf("Got an error while creating a temporary file: %s", err))) return } _, err = io.Copyn(fd, message.Data, message.DataSize) if err != nil && err != os.EOF { log.Error("%d: FSS: Got an error while creating a temporary file (%s) for write of %s: %s", tempfile, path, err) fss.comm.RespondError(message, os.NewError(fmt.Sprintf("Got an error while creating a temporary file: %s", err))) fd.Close() os.Remove(tempfile) return } fd.Close() fss.Lock(path.String()) localheader := fss.headers.GetFileHeader(path) version := localheader.header.NextVersion localheader.header.NextVersion++ localheader.header.Path = path.String() localheader.header.Name = path.BaseName() localheader.header.MimeType = mimetype localheader.header.Size = message.DataSize localheader.header.Version = version localheader.header.Exists = true file := OpenFile(fss, localheader, version) os.Rename(tempfile, file.datapath) localheader.Save() fss.Unlock(path.String()) // send to parent syncParent := make(chan os.Error, 1) go func() { parent := path.ParentPath() if !path.Equals(parent) { req := fss.comm.NewMsgMessage(fss.serviceId) req.Function = "RemoteChildAdd" req.Timeout = 5000 // TODO: Config req.Retries = 10 req.RetryDelay = 100 req.OnTimeout = func(last bool) (retry bool, handled bool) { if last { log.Error("%d FSS: Couln't add %s to parent after 10 tries\n", fss.cluster.MyNode.Id, path) } return true, false } req.LastTimeoutAsError = true req.OnError = func(response *comm.Message, error os.Error) { syncParent <- error } req.OnResponse = func(response *comm.Message) { syncParent <- nil } req.Message.WriteString(parent.String()) // path req.Message.WriteString(path.Parts[len(path.Parts)-1]) // name req.Message.WriteString(mimetype) // type req.Message.WriteInt64(message.DataSize) // size parentResolve := fss.ring.Resolve(parent.String()) fss.comm.SendFirst(parentResolve, req) } else { syncParent <- nil } }() // send new header to all replicas syncReplica := fss.sendToReplicaNode(resolveResult, func(node *cluster.Node) *comm.Message { req := fss.comm.NewMsgMessage(fss.serviceId) req.Function = "RemoteReplicaVersion" req.Message.WriteString(path.String()) // path req.Message.WriteInt64(localheader.header.Version) // current version req.Message.WriteInt64(localheader.header.NextVersion) // next version req.Message.WriteInt64(localheader.header.Size) // size req.Message.WriteString(localheader.header.MimeType) // mimetype return req }) replicaError := <-syncReplica if replicaError != nil { log.Error("FSS: Couldn't replicate header to nodes: %s\n", replicaError) fss.comm.RespondError(message, replicaError) return // TODO: ROLLBACK!! } parentError := <-syncParent if parentError != nil { log.Error("FSS: Couldn't add myself to parent: %s\n", parentError) fss.comm.RespondError(message, parentError) return // TODO: ROLLBACK!! } // confirm log.Debug("%d FSS: Sending write confirmation for path %s message %s\n", fss.cluster.MyNode.Id, path, message) response := fss.comm.NewMsgMessage(fss.serviceId) fss.comm.RespondSource(message, response) }
func (fss *FsService) RemoteDelete(message *comm.Message) { strPath, _ := message.Message.ReadString() // path path := NewPath(strPath) recursive, _ := message.Message.ReadBool() // recursive flag first, _ := message.Message.ReadBool() // first level flag log.Debug("%d FSS: Received a new delete message for path=%s recursive=%d\n", fss.cluster.MyNode.Id, path, recursive) resolveResult := fss.ring.Resolve(path.String()) if resolveResult.IsFirst(fss.cluster.MyNode) { localheader := fss.headers.GetFileHeader(path) if !localheader.header.Exists { fss.comm.RespondError(message, ErrorFileNotFound) return } else { children := localheader.header.Children // if there are no children if len(children) == 0 { fss.Lock(path.String()) localheader.header.Exists = false localheader.header.ClearChildren() localheader.header.Size = 0 localheader.Save() // sync replicas syncChan := fss.sendToReplicaNode(resolveResult, func(node *cluster.Node) *comm.Message { msg := fss.comm.NewMsgMessage(fss.serviceId) msg.Function = "RemoteDeleteReplica" msg.Message.WriteString(path.String()) // path msg.Message.WriteInt64(localheader.header.Version) // version return msg }) // wait for sync syncErr := <-syncChan if syncErr != nil { log.Error("FSS: Couldn't delete replica from nodes: %s\n", syncErr) } fss.Unlock(path.String()) } else { // if there are children if recursive { // Lock the file fss.Lock(path.String()) // Send delete to children c := make(chan int, 1) for _, child := range children { try := 0 var deletechild func() deletechild = func() { childpath := path.ChildPath(child.Name) msg := fss.comm.NewMsgMessage(fss.serviceId) msg.Function = "RemoteDelete" // write payload msg.Message.WriteString(childpath.String()) // path msg.Message.WriteBool(recursive) // recursive = 1 here msg.Message.WriteBool(false) // not first here childres := fss.ring.Resolve(childpath.String()) msg.Timeout = 1000 // TODO: Config msg.OnTimeout = func(last bool) (retry bool, handled bool) { if try < 10 { try++ deletechild() } else { log.Error("FSS: Couldn't delete child=%s of path=%s after 10 tries\n", child, path) c <- 1 } return true, false } msg.OnResponse = func(message *comm.Message) { c <- 1 } fss.comm.SendNode(childres.GetFirst(), msg) } deletechild() <-c } // delete the file locally file := OpenFile(fss, localheader, 0) file.Delete() // delete in the header localheader.header.ClearChildren() localheader.header.Exists = false localheader.header.Size = 0 localheader.Save() // sync replicas syncChan := fss.sendToReplicaNode(resolveResult, func(node *cluster.Node) *comm.Message { msg := fss.comm.NewMsgMessage(fss.serviceId) msg.Function = "RemoteDeleteReplica" msg.Message.WriteString(path.String()) // path localheader.header.Version, _ = msg.Message.ReadInt64() // version return msg }) // wait for sync syncErr := <-syncChan if syncErr != nil { log.Error("FSS: Couldn't delete replica from nodes: %s\n", syncErr) fss.comm.RespondError(message, os.NewError(fmt.Sprintf("Couldn't replicate %s to nodes: %s", path.String(), syncErr))) return } fss.Unlock(path.String()) } else { // non recursive flag, I have children so respond error fss.comm.RespondError(message, ErrorNotEmpty) return } } // Remove the child from the parent (check ChildAdd, Write if logic changes here) if first { try := 0 var deleteparent func() deleteparent = func() { parent := path.ParentPath() if !path.Equals(parent) { msg := fss.comm.NewMsgMessage(fss.serviceId) msg.Function = "RemoteChildRemove" msg.LastTimeoutAsError = false msg.Message.WriteString(parent.String()) // parent path msg.Message.WriteString(path.Parts[len(path.Parts)-1]) // name msg.Timeout = 1000 // TODO: Config msg.OnTimeout = func(last bool) (retry bool, handled bool) { if try < 10 { try++ deleteparent() } else { log.Error("FSS: Couldn't delete path=%s from parent after 10 tries\n", path) fss.comm.RespondError(message, os.NewError(fmt.Sprintf("Couldn't delete %s from parent after 10 tries.", path.String()))) } return true, false } parentResolve := fss.ring.Resolve(parent.String()) fss.comm.SendNode(parentResolve.GetFirst(), msg) } } go deleteparent() // Send a confirmation result := fss.comm.NewMsgMessage(fss.serviceId) fss.comm.RespondSource(message, result) } else { // i'm an underneat child and asked to remove by parent, simply acknowledge it // Send an acknowledgement msg := fss.comm.NewMsgMessage(fss.serviceId) fss.comm.RespondSource(message, msg) } } } else { // not master, send it to master fss.comm.RedirectFirst(resolveResult, message) } }
func NewProcess(config gostore.Config) *Process { proc := new(Process) proc.Config = config // Create the cluster instance proc.Cluster = cluster.NewCluster(proc.Config) nodes := proc.Cluster.Nodes // Validate rings if len(config.Rings) == 0 { log.Fatal("CONFIG: At least one ring must be configured (at least the global)") } // Generate active nodes for _, confnode := range config.Nodes { acnode := cluster.NewNode(confnode.NodeId, net.ParseIP(confnode.NodeIP), confnode.TCPPort, confnode.UDPPort) for _, confring := range confnode.Rings { acnode.AddRing(confring.RingId, confring.Token) } nodes.Add(acnode) } // My node mynode := nodes.Get(config.CurrentNode) proc.Cluster.SetMyNode(mynode) // Fill all rings proc.Cluster.FillRings() // Create services server proc.Sc = comm.NewComm(proc.Cluster) var oneCls bool for _, sconfig := range config.Services { switch sconfig.Type { case "fs": log.Debug("Server: creating file system service\n") proc.Fss = fs.NewFsService(proc.Sc, &sconfig) proc.Sc.AddService(comm.Service(proc.Fss), sconfig) break case "cls": log.Debug("Server: creating cluster service\n") proc.Cls = cls.NewClusterService(proc.Sc, config, sconfig) proc.Sc.AddService(comm.Service(proc.Cls), sconfig) oneCls = true break } } // if no cluster service, make all nodes online if !oneCls { for node := range nodes.Iter() { node.Status = cluster.Status_Online // TODO: Should not be here! } } // boot services proc.Sc.BootServices() return proc }