func prepareRemoteRelPaths(root *drive.File, files []*drive.File) (map[string]string, error) { // The tree only holds integer values so we use // maps to lookup file by index and index by file id indexLookup := map[string]graph.NI{} fileLookup := map[graph.NI]*drive.File{} // All files includes root dir allFiles := append([]*drive.File{root}, files...) // Prepare lookup maps for i, f := range allFiles { indexLookup[f.Id] = graph.NI(i) fileLookup[graph.NI(i)] = f } // This will hold 'parent index' -> 'file index' relationships pathEnds := make([]graph.PathEnd, len(allFiles)) // Prepare parent -> file relationships for i, f := range allFiles { if f == root { pathEnds[i] = graph.PathEnd{From: -1} continue } // Lookup index of parent parentIdx, found := indexLookup[f.Parents[0]] if !found { return nil, fmt.Errorf("Could not find parent of %s (%s)", f.Id, f.Name) } pathEnds[i] = graph.PathEnd{From: parentIdx} } // Create parent pointer tree and calculate path lengths tree := &graph.FromList{Paths: pathEnds} tree.RecalcLeaves() tree.RecalcLen() // This will hold a map of file id => relative path paths := map[string]string{} // Find relative path from root for all files for _, f := range allFiles { if f == root { continue } // Find nodes between root and file nodes := tree.PathTo(indexLookup[f.Id], nil) // This will hold the name of all paths between root and // file (exluding root and including file itself) pathNames := []string{} // Lookup file for each node and grab name for _, n := range nodes { file := fileLookup[n] if file == root { continue } pathNames = append(pathNames, file.Name) } // Join path names to form relative path and add to map paths[f.Id] = filepath.Join(pathNames...) } return paths, nil }
// UltrametricD is the same as Ultrametric but is destructive on the receiver. // // It saves a little memory if you have no further use for the distance matrix. func (dm DistanceMatrix) UltrametricD(cdf int) (graph.FromList, []Ultrametric) { pl := make([]graph.PathEnd, len(dm)) // the parent-list ul := make([]Ultrametric, len(dm)) // labels for the parent-list for i := range pl { // "initial isolated nodes" pl[i] = graph.PathEnd{ From: -1, Len: 1, } ul[i] = Ultrametric{Weight: math.NaN(), Age: 0} } // clusters is the list of clusters available for merging. it starts // with all leaf nodes and is reduced in length as clusters are merged. // values represent distance matrix indexes clusters := make([]int, len(dm)) for i := range dm { clusters[i] = i } // cx converts a distance matrix index to a node number cx := make([]graph.NI, len(dm)) for i := range dm { cx[i] = graph.NI(i) } // extra workspace for DMIN var cl [][]int if cdf == DMIN { cl = make([][]int, len(dm)*2-1) for i := range dm { cl[i] = []int{i} } } for { d1, d2, cl2 := dm.closest(clusters) c1 := cx[d1] // cluster (node) numbers c2 := cx[d2] di1 := dm[d1] // rows in distance matrix di2 := dm[d2] m1 := pl[c1].Len // number of leaves in each cluster m2 := pl[c2].Len m3 := m1 + m2 // total number of leaves for new cluster // create node here, initial values come from d1, d2 parent := graph.NI(len(pl)) age := di2[d1] / 2 pl = append(pl, graph.PathEnd{ From: -1, Len: m3, }) ul = append(ul, Ultrametric{ Weight: math.NaN(), Age: age, }) pl[c1].From = parent pl[c2].From = parent ul[c1].Weight = age - ul[c1].Age ul[c2].Weight = age - ul[c2].Age if len(clusters) == 2 { break } cx[d1] = parent // replace d1 with new computed distance switch cdf { case DAVG: mag1 := float64(m1) mag2 := float64(m2) invMag := 1 / float64(m3) for _, j := range clusters { dij := di1[j] if j != d1 { d := (dij*mag1 + di2[j]*mag2) * invMag di1[j] = d dm[j][d1] = d } } case DMIN: for _, j := range clusters { dj1 := di1[j] if dj2 := di2[j]; dj2 < dj1 { di1[j] = dj2 } else { dm[j][d1] = dj1 } } default: panic("Ultrametric: invalid distance function") } // d1 has been replaced, delete d2 last := len(clusters) - 1 clusters[cl2] = clusters[last] clusters = clusters[:last] } return graph.FromList{Paths: pl}, ul }
// NeighborJoinD is the same as NeighborJoin but is destructive on the receiver. // // It saves a little memory if you have no further use for the distance matrix. func (dm DistanceMatrix) NeighborJoinD() (u graph.LabeledUndirected, wt []float64) { td := make([]float64, len(dm)) // total-distance vector nx := make([]graph.NI, len(dm)) // node number corresponding to dist matrix index for i := range dm { nx[i] = graph.NI(i) } // closest clusters (min value in dm) // return smaller index (j) first closest := func() (jMin, iMin int) { min := math.Inf(1) iMin = -1 jMin = -1 for i := 1; i < len(dm); i++ { for j := 0; j < i; j++ { d := float64(len(dm)-2)*dm[i][j] - td[i] - td[j] if d < min { min = d iMin = i jMin = j } } } return } // wt is edge weight from parent (limb length) var tree graph.LabeledAdjacencyList var nj func(graph.NI) nj = func(m graph.NI) { // m is next internal node number if len(dm) == 2 { wt = make([]float64, 1, m-1) wt[0] = dm[0][1] tree = make(graph.LabeledAdjacencyList, m) n0 := nx[0] n1 := nx[1] tree[n0] = []graph.Half{{To: n1}} tree[n1] = []graph.Half{{To: n0}} return } // compute or recompute TotalDistance for k, dk := range dm { t := 0. for _, d := range dk { t += d } td[k] = t } d1, d2 := closest() Δ := (td[d2] - td[d1]) / float64(len(dm)-2) d21 := dm[d2][d1] ll2 := .5 * (d21 + Δ) ll1 := .5 * (d21 - Δ) n1 := nx[d1] n2 := nx[d2] di1 := dm[d1] // rows in distance matrix di2 := dm[d2] // replace d1 with mean distance for j, dij := range di1 { mn := .5 * (dij + di2[j] - d21) if j == d1 && mn != 0 { panic("uh uh, prolly skip this one...") } di1[j] = mn dm[j][d1] = mn } // d1 has been replaced, delete d2 copy(dm[d2:], dm[d2+1:]) dm = dm[:len(dm)-1] for i, di := range dm { copy(di[d2:], di[d2+1:]) dm[i] = di[:len(di)-1] } nx[d1] = m copy(nx[d2:], nx[d2+1:]) nx = nx[:len(dm)] // recurse nj(m + 1) // join limbs to tree wx1 := graph.LI(len(wt)) wx2 := wx1 + 1 wt = append(wt, ll1, ll2) tree[m] = append(tree[m], graph.Half{n1, wx1}, graph.Half{n2, wx2}) tree[n1] = append(tree[n1], graph.Half{m, wx1}) tree[n2] = append(tree[n2], graph.Half{m, wx2}) return } nj(graph.NI(len(dm))) return graph.LabeledUndirected{tree}, wt }
// AdditiveTree constructs an unrooted tree from an additive distance matrix. // // DistanceMatrix d must be additive. Use provably additive matrices or // use DistanceMatrix.Additive() to verify the additive property. // // Result is an unrooted tree, not necessarily binary, as an undirected graph. // The first len(d) nodes are the leaves represented by the distance matrix. // Internal nodes follow. // // Time complexity is O(n^2) in the number of leaves. func (d DistanceMatrix) AdditiveTree() (u graph.LabeledUndirected, edgeWts []float64) { // interpretation of the presented recursive algorithm. ideas of // things to try: 1: construct result as a parent list rather than // a child tree. 2: drop the recursion. 3. make tree always binary. t := make(graph.LabeledAdjacencyList, len(d), len(d)+len(d)-2) var ap func(int) ap = func(n int) { if n == 1 { edgeWts = []float64{d[0][1]} t[0] = []graph.Half{{1, 0}} t[1] = []graph.Half{{0, 0}} return } nLen, i, k := d.limbWeightSubMatrix(n) x := d[i][n] - nLen ap(n - 1) // f() finds and returns connection node v. // method: df search to find i from k, find connection point on the // way out. // create connection node v if needed, return v if found, -1 if not. var vis graph.Bits var f func(n graph.NI) graph.NI f = func(n graph.NI) graph.NI { if int(n) == i { return n } vis.SetBit(n, 1) for tx, to := range t[n] { if vis.Bit(to.To) == 1 { continue } p := f(to.To) switch { case p < 0: // not found yet continue case x == 0: // p is connection node return p case x < edgeWts[to.Label]: // new node at dist x from to.To // plan is to recycle the existing half edges between // n and to.To to go to new node v. The edge(n, v) // gets to keep the recycled edge label with weight // reduced by x. The edge(to.To, v) gets a new edge label // with weight x. v := graph.NI(len(t)) // new node t[n][tx].To = v // redirect half edgeWts[to.Label] -= x // reduce wt y := graph.LI(len(edgeWts)) // new label for edge(to.To, v) edgeWts = append(edgeWts, x) // now find reciprocal half from to.To back to n for fx, from := range t[to.To] { if from.To == n { // here it is // recycle it to go to v now. t[to.To][fx] = graph.Half{v, y} break } } t = append(t, []graph.Half{{n, to.Label}, {to.To, y}}) x = 0 return v default: // continue back out x -= edgeWts[to.Label] return n } } return -1 } vis.Clear() v := f(graph.NI(k)) y := graph.LI(len(edgeWts)) edgeWts = append(edgeWts, nLen) t[n] = []graph.Half{{v, y}} t[v] = append(t[v], graph.Half{graph.NI(n), y}) } ap(len(d) - 1) return graph.LabeledUndirected{t}, edgeWts }