Exemple #1
0
func readAnnotations(file string) (map[string]*interval.IntTree, error) {
	f, err := os.Open(file)
	if err != nil {
		return nil, err
	}
	trees := make(map[string]*interval.IntTree)
	sc := featio.NewScanner(gff.NewReader(f))
	for id := uintptr(1); sc.Next(); id++ {
		f := sc.Feat().(*gff.Feature)
		t, ok := trees[f.SeqName]
		if !ok {
			t = &interval.IntTree{}
			trees[f.SeqName] = t
		}
		t.Insert(gffInterval{f, id}, true)
	}
	err = sc.Error()
	if err != nil {
		log.Fatalf("error during GFF read: %v", err)
	}
	for _, t := range trees {
		t.AdjustRanges()
	}
	return trees, nil
}
Exemple #2
0
func GFFToRelatable(fh io.Reader) (interfaces.RelatableChannel, error) {

	ch := make(chan interfaces.Relatable, 16)

	go func() {
		var g *gff.Reader
		g = gff.NewReader(fh)

		for {
			feat, err := g.Read()
			if err != nil {
				if err == io.EOF {
					break
				} else {
					log.Println(err)
					break
				}
			}
			// since Read returns the interface, first cast back
			// to gff.Feature so we have the needed Attributes.
			gfeat := feat.(*gff.Feature)
			f := Gff{Feature: gfeat, related: make([]interfaces.Relatable, 0, 7)}
			ch <- &f
		}
		close(ch)
	}()
	return ch, nil
}
Exemple #3
0
func main() {
	flag.Parse()

	w := gff.NewWriter(os.Stdout, 60, false)
	sc := featio.NewScanner(gff.NewReader(os.Stdin))
	for sc.Next() {
		f := sc.Feat().(*gff.Feature)
		r := f.FeatAttributes.Get("Repeat")
		fields := strings.Fields(r)
		if len(fields) < 4 {
			log.Fatal("invalid repeat attribute")
		}
		end, err := strconv.Atoi(fields[3])
		if err != nil {
			log.Fatalf("failed to parse end coordinate: %v", err)
		}
		remainder, err := strconv.Atoi(fields[4])
		if err != nil {
			log.Fatalf("failed to parse remains coordinate: %v", err)
		}
		length := end + remainder
		if length < *thresh {
			continue
		}
		w.Write(f)
	}
	if err := sc.Error(); err != nil {
		log.Fatalf("error during gff read: %v", err)
	}
}
Exemple #4
0
func main() {
	flag.Parse()
	if *in == "" {
		flag.Usage()
		os.Exit(1)
	}

	f, err := os.Open(*in)
	if err != nil {
		log.Fatalf("failed to open %q: %v", *in, err)
	}
	events := make(map[string][]*gff.Feature)
	fsc := featio.NewScanner(gff.NewReader(f))
	for fsc.Next() {
		f := fsc.Feat().(*gff.Feature)
		fields := strings.Fields(f.FeatAttributes.Get("Read"))
		if len(fields) != 3 {
			log.Fatalf("bad record: %+v", f)
		}
		events[fields[0]] = append(events[fields[0]], f)
	}
	if err := fsc.Error(); err != nil {
		log.Fatalf("error during gff read: %v", err)
	}
	f.Close()

	for _, ref := range flag.Args() {
		f, err = os.Open(ref)
		if err != nil {
			log.Fatalf("failed to open reference %q: %v", ref, err)
		}
		ssc := seqio.NewScanner(fasta.NewReader(f, linear.NewSeq("", nil, alphabet.DNA)))
		for ssc.Next() {
			seq := ssc.Seq().(*linear.Seq)
			for _, f := range events[seq.Name()] {
				fields := strings.Fields(f.FeatAttributes.Get("Read"))
				if len(fields) != 3 {
					log.Fatalf("bad record: %+v", f)
				}
				start, err := strconv.Atoi(fields[1])
				if err != nil {
					log.Fatalf("failed to get start coordinate: %v", err)
				}
				end, err := strconv.Atoi(fields[2])
				if err != nil {
					log.Fatalf("failed to get end coordinate: %v", err)
				}
				tmp := *seq
				tmp.ID += fmt.Sprintf("//%d_%d", start, end)
				tmp.Seq = tmp.Seq[start:end]
				fmt.Printf("%60a\n", &tmp)
			}
		}
		if err := ssc.Error(); err != nil {
			log.Fatalf("error during fasta read: %v", err)
		}
		f.Close()
	}
}
Exemple #5
0
func filterFeats(annot string, classes, names []string, thresh float64) ([]interval.IntTree, error) {
	ntab := make(map[string]int, len(names))
	for i, n := range names {
		ntab[n] = i
	}

	ts := make([]interval.IntTree, len(names))

	cm := make(map[string]struct{})
	for _, c := range classes {
		cm[c] = struct{}{}
	}

	f, err := os.Open(annot)
	if err != nil {
		return nil, err
	}
	fs := featio.NewScanner(gff.NewReader(f))
	for fs.Next() {
		f := fs.Feat().(*gff.Feature)
		if f.FeatScore == nil || math.Exp(*f.FeatScore) < thresh {
			continue
		}
		var class string
		if att := f.FeatAttributes.Get(f.Feature); att != "" {
			// This gets the repeat attributes only.
			class = f.Feature + "/" + strings.Fields(att)[1]
		} else {
			class = f.Feature
		}
		if _, ok := cm[class]; !ok {
			last := strings.LastIndex(class, "/")
			if last == strings.Index(class, "/") {
				continue
			}
			if _, ok := cm[class[:last]]; !ok {
				continue
			}
		}
		if chr, ok := ntab[f.SeqName]; ok {
			ts[chr].Insert(intGff{f}, true)
		}
	}
	if err := fs.Error(); err != nil {
		return nil, err
	}
	for i := range ts {
		ts[i].AdjustRanges()
	}

	return ts, nil
}
Exemple #6
0
func annotFeats(annot string, classes, names []string) ([]interval.IntTree, error) {
	ntab := make(map[string]int, len(names))
	for i, n := range names {
		ntab[n] = i
	}

	ts := make([]interval.IntTree, len(names))

	cm := make(map[string]struct{})
	for _, c := range classes {
		cm[c] = struct{}{}
	}

	f, err := os.Open(annot)
	if err != nil {
		return nil, err
	}
	fs := featio.NewScanner(gff.NewReader(f))
	for id := uintptr(0); fs.Next(); id++ {
		f := fs.Feat().(*gff.Feature)
		var class string
		att := f.FeatAttributes.Get("repeat")
		if att == "" {
			// Ignore non-repeat features.
			continue
		}
		repeatFields := strings.Fields(att)
		class = f.Feature + "/" + repeatFields[1]
		if _, ok := cm[class]; !ok {
			last := strings.LastIndex(class, "/")
			if last == strings.Index(class, "/") {
				continue
			}
			if _, ok := cm[class[:last]]; !ok {
				continue
			}
		}
		if chr, ok := ntab[f.SeqName]; ok {
			ts[chr].Insert(intGff{f, id}, true)
		}
	}
	if err := fs.Error(); err != nil {
		return nil, err
	}
	for i := range ts {
		ts[i].AdjustRanges()
	}

	return ts, nil
}
Exemple #7
0
func main() {
	flag.Parse()
	if *exclude == "" {
		flag.Usage()
		os.Exit(1)
	}

	nameSet := make(map[string]struct{})
	f, err := os.Open(*exclude)
	if err != nil {
		log.Fatalf("failed to open exclude file %q: %v", *exclude, err)
	}
	ls := bufio.NewScanner(f)
	for ls.Scan() {
		nameSet[ls.Text()] = struct{}{}
	}
	err = ls.Err()
	if err != nil {
		log.Fatalf("failed to read exclude file: %v", err)
	}

	w := gff.NewWriter(os.Stdout, 60, true)
	var excl *gff.Writer
	if *retain {
		excl = gff.NewWriter(os.Stderr, 60, true)
	}
	sc := featio.NewScanner(gff.NewReader(os.Stdin))
	for sc.Next() {
		f := sc.Feat().(*gff.Feature)
		n := f.FeatAttributes.Get("Read")
		if _, ok := nameSet[n]; ok {
			if excl != nil {
				_, err := excl.Write(f)
				if err != nil {
					log.Fatalf("failed to write feature: %v", err)
				}
			}
			continue
		}
		_, err := w.Write(f)
		if err != nil {
			log.Fatalf("failed to write feature: %v", err)
		}
	}
	if err := sc.Error(); err != nil {
		log.Fatalf("error during gff read: %v", err)
	}
}
Exemple #8
0
func main() {
	flag.Parse()

	var grps []map[string]int
	sc := featio.NewScanner(gff.NewReader(os.Stdin))
	for sc.Next() {
		f := sc.Feat().(*gff.Feature)
		r := f.FeatAttributes.Get("Repeat")
		g := f.FeatAttributes.Get("Group")
		typ := strings.Fields(r)[0]
		if !*doGrouping {
			fmt.Printf("%s\t%s\n", g, typ)
		}
		gid, err := strconv.Atoi(g)
		if err != nil {
			log.Fatalf("failed to parse group id: %v", err)
		}
		grps = add(grps, gid, typ)
	}
	if err := sc.Error(); err != nil {
		log.Fatalf("error during gff read: %v", err)
	}

	if !*doGrouping {
		return
	}
	for gid, g := range grps {
		if g == nil {
			continue
		}
		fmt.Printf("%d\t", gid)
		m := sortedMap(g)
		for i, t := range m {
			if i != 0 {
				fmt.Print(" ")
			}
			fmt.Printf("%s:%d", t.typ, t.n)
		}
		name := nameHeuristic(m)
		fmt.Printf("\t%s\t%s\n", name, trunc(name, 5))
	}
}
Exemple #9
0
func (s *S) TestReadFromFunc(c *check.C) {
	for i, g := range []struct {
		gff  string
		feat []*gff.Feature
	}{
		{
			gff: `SEQ1	EMBL	atg	103	105	.	+	0
SEQ1	EMBL	exon	103	172	.	+	0
SEQ1	EMBL	splice5	172	173	.	+	.
SEQ1	netgene	splice5	172	173	0.94	+	.
SEQ1	genie	sp5-20	163	182	2.3	+	.
SEQ1	genie	sp5-10	168	177	2.1	+	.
SEQ2	grail	ATG	17	19	2.1	-	0
`,
			feat: []*gff.Feature{
				{SeqName: "SEQ1", Source: "EMBL", Feature: "atg", FeatStart: 102, FeatEnd: 105, FeatScore: nil, FeatFrame: gff.Frame0, FeatStrand: seq.Plus},
				{SeqName: "SEQ1", Source: "EMBL", Feature: "exon", FeatStart: 102, FeatEnd: 172, FeatScore: nil, FeatFrame: gff.Frame0, FeatStrand: seq.Plus},
				{SeqName: "SEQ1", Source: "EMBL", Feature: "splice5", FeatStart: 171, FeatEnd: 173, FeatScore: nil, FeatFrame: gff.NoFrame, FeatStrand: seq.Plus},
				{SeqName: "SEQ1", Source: "netgene", Feature: "splice5", FeatStart: 171, FeatEnd: 173, FeatScore: floatPtr(0.94), FeatFrame: gff.NoFrame, FeatStrand: seq.Plus},
				{SeqName: "SEQ1", Source: "genie", Feature: "sp5-20", FeatStart: 162, FeatEnd: 182, FeatScore: floatPtr(2.3), FeatFrame: gff.NoFrame, FeatStrand: seq.Plus},
				{SeqName: "SEQ1", Source: "genie", Feature: "sp5-10", FeatStart: 167, FeatEnd: 177, FeatScore: floatPtr(2.1), FeatFrame: gff.NoFrame, FeatStrand: seq.Plus},
				{SeqName: "SEQ2", Source: "grail", Feature: "ATG", FeatStart: 16, FeatEnd: 19, FeatScore: floatPtr(2.1), FeatFrame: gff.Frame0, FeatStrand: seq.Minus},
			},
		},
	} {
		sc := featio.NewScannerFromFunc(
			gff.NewReader(
				bytes.NewBufferString(g.gff),
			).Read,
		)

		var j int
		for sc.Next() {
			f := sc.Feat()
			c.Check(f, check.DeepEquals, g.feat[j], check.Commentf("Test: %d Line: %d", i, j+1))
			j++
		}
		c.Check(sc.Error(), check.Equals, nil)
		c.Check(j, check.Equals, len(g.feat))
	}
}
Exemple #10
0
func readMappings(file string) (map[string]*gff.Feature, error) {
	f, err := os.Open(file)
	if err != nil {
		return nil, err
	}
	mapping := make(map[string]*gff.Feature)
	sc := featio.NewScanner(gff.NewReader(f))
	for id := uintptr(1); sc.Next(); id++ {
		f := sc.Feat().(*gff.Feature)
		read := f.FeatAttributes.Get("Read")
		if read == "" {
			continue
		}
		// Currently reefer only expects a single hit per read.
		mapping[strings.Fields(read)[0]] = f
	}
	if err != nil {
		log.Fatalf("error during GFF read: %v", err)
	}
	return mapping, nil
}
Exemple #11
0
func main() {
	groups := make(map[string]struct {
		chrom      string
		start, end int
	})

	sc := featio.NewScanner(gff.NewReader(os.Stdin))
	for sc.Next() {
		f := sc.Feat().(*gff.Feature)
		g := f.FeatAttributes.Get("Group")
		if g == "" {
			continue
		}
		grp, ok := groups[g]
		if !ok {
			groups[g] = struct {
				chrom      string
				start, end int
			}{chrom: f.SeqName, start: f.FeatStart, end: f.FeatEnd}
			continue
		}
		if f.FeatStart < grp.start {
			grp.start = f.FeatStart
		}
		if grp.end < f.FeatEnd {
			grp.end = f.FeatEnd
		}
		groups[g] = grp
	}
	if err := sc.Error(); err != nil {
		log.Fatalf("error during gff read: %v", err)
	}

	for k, v := range groups {
		fmt.Printf("%s\t%d\t%d\t%s\n", v.chrom, v.start, v.end, k)
	}
}
Exemple #12
0
func main() {
	vm := make(map[string]*vector)
	fm := make(map[string]string)

	sc := featio.NewScanner(gff.NewReader(os.Stdin))
	for sc.Next() {
		f := sc.Feat().(*gff.Feature)
		if f.Feature != "repeat" {
			continue
		}
		att := f.FeatAttributes.Get("repeat")
		if att == "" {
			continue
		}
		fields := strings.Fields(att)

		s := mustAtoi(fields[2])
		e := mustAtoi(fields[3])
		v, ok := vm[fields[0]]
		if !ok {
			v = &vector{}
			vm[fields[0]] = v
			fm[fields[0]] = fields[1]
		}
		for i := e; i >= s; i-- {
			v.inc(i)
		}
	}

	for typ, vec := range vm {
		for pos, val := range *vec {
			if val != 0 {
				fmt.Printf("%s\t%s\t%d\t%d\n", fm[typ], typ, pos, val)
			}
		}
	}
}
Exemple #13
0
func main() {
	nameSet := make(map[string]struct{})
	sc := featio.NewScanner(gff.NewReader(os.Stdin))
	for sc.Next() {
		f := sc.Feat().(*gff.Feature)
		n := f.FeatAttributes.Get("Read")
		if n == "" {
			continue
		}
		nameSet[n] = struct{}{}
	}
	if err := sc.Error(); err != nil {
		log.Fatalf("error during gff read: %v", err)
	}

	names := make([]string, 0, len(nameSet))
	for n := range nameSet {
		names = append(names, n)
	}
	sort.Strings(names)
	for _, n := range names {
		fmt.Println(n)
	}
}
Exemple #14
0
func main() {
	if len(os.Args) < 2 {
		fmt.Fprintln(os.Stderr, "invalid invocation: must have at least one reads file")
		os.Exit(1)
	}

	extract := make(map[string][2]int)
	sc := featio.NewScanner(gff.NewReader(os.Stdin))
	for sc.Next() {
		f := sc.Feat().(*gff.Feature)
		read := f.FeatAttributes.Get("Read")
		if read == "" {
			continue
		}
		fields := strings.Fields(read)
		name := fields[0]
		start, err := strconv.Atoi(fields[1])
		if err != nil {
			log.Fatalf("failed to parse %q: %v", read, err)
		}
		end, err := strconv.Atoi(fields[2])
		if err != nil {
			log.Fatalf("failed to parse %q: %v", read, err)
		}
		extract[name] = [2]int{start, end}
	}
	err := sc.Error()
	if err != nil {
		log.Fatalf("error during GFF read: %v", err)
	}

	for _, reads := range os.Args[1:] {
		sf, err := os.Open(reads)
		if err != nil {
			log.Fatalf("failed to open %q: %v", reads, err)
		}
		sr, err := sam.NewReader(sf)
		if err != nil {
			log.Fatalf("failed to open SAM input %q: %v", reads, err)
		}
		for {
			r, err := sr.Read()
			if err != nil {
				if err != io.EOF {
					log.Fatalf("unexpected error reading SAM: %v", err)
				}
				break
			}

			v, ok := extract[r.Name]
			if !ok {
				continue
			}
			// Currently reefer only expects a single hit per read,
			// so any multiples are due to duplicate read file input.
			// Update this behaviour if we change reefer to look at
			// remapping soft-clipped segments.
			delete(extract, r.Name)

			reverse := r.Flags&sam.Reverse != 0
			rng := fmt.Sprintf("//%d_%d", v[0], v[1])
			if reverse {
				rng += "(-)"
				len := r.Seq.Length
				v[0], v[1] = len-v[1], len-v[0]
			}
			v[0] = feat.OneToZero(v[0])
			s := linear.NewSeq(
				r.Name+rng,
				alphabet.BytesToLetters(r.Seq.Expand())[v[0]:v[1]],
				alphabet.DNA,
			)
			if reverse {
				s.Desc = "(sequence revcomp relative to read)"
			}
			fmt.Printf("%60a\n", s)
		}
		sf.Close()
	}
}
Exemple #15
0
func main() {
	flag.Parse()
	if *in == "" || *ref == "" || *mapfile == "" || *contigs == "" {
		flag.Usage()
		os.Exit(0)
	}

	refTrees, err := readAnnotations(*ref)
	if err != nil {
		log.Fatalf("failed to read annotation trees: %v", err)
	}
	mapping, err := readMappings(*mapfile)
	if err != nil {
		log.Fatalf("failed to read mapping file: %v", err)
	}
	contigLength, err := readContigs(*contigs)
	if err != nil {
		log.Fatalf("failed to read contig file: %v", err)
	}

	f, err := os.Open(*in)
	if err != nil {
		log.Fatalf("failed to open %q: %v", *in, err)
	}

	w := gff.NewWriter(os.Stdout, 60, true)

	sc := featio.NewScanner(gff.NewReader(f))
	for sc.Next() {
		f := sc.Feat().(*gff.Feature)
		ok, err := within(*buf, f.SeqName)
		if err != nil {
			log.Fatalf("failed to parse sequence name: %s: %v", f.SeqName, err)
		}
		if !ok {
			log.Printf("too close to read end: excluding %+v", f)
			continue
		}

		repeat := f.FeatAttributes.Get("Repeat")
		if repeat == "" {
			continue
		}
		fields := strings.Fields(repeat)

		name := strings.Split(f.SeqName, "//")
		if len(name) != 2 {
			log.Fatalf("unexpected sequence name in input: %q", f.SeqName)
		}
		contigSide, ok := mapping[name[0]]
		if !ok {
			log.Fatalf("unexpected sequence name in input: %q", f.SeqName)
		}
		if contigSide.FeatStart+f.FeatStart < *buf {
			log.Printf("too close to contig start:\n\texcluding %#v\n\tcontig %#v\n\n%d < %d", f, contigSide, contigSide.FeatStart, *buf)
			continue
		}
		length, ok := contigLength[contigSide.SeqName]
		if !ok {
			log.Fatalf("unexpected sequence name in contig mapping: %q", contigSide.SeqName)
		}
		if length-((contigSide.FeatEnd-contigSide.FeatStart)+f.FeatEnd) < *buf {
			log.Printf("too close to contig end:\n\texcluding %#v\n\tcontig %#v", f, contigSide)
			continue
		}
		t, ok := refTrees[contigSide.SeqName]
		if !ok {
			log.Fatalf("no tree for %v mapped by %v", contigSide.SeqName, f.SeqName)
		}
		var n int
		hits := t.Get(gffInterval{Feature: contigSide})
		for _, h := range hits {
			f := h.(gffInterval)
			repeat := f.FeatAttributes.Get("Repeat")
			if repeat == "" {
				continue
			}
			hitClass := strings.Fields(repeat)[1]
			if fields[1] == hitClass {
				n++
			}
		}
		if n != 0 {
			log.Printf("too many hits: excluding %+v", f)
			for _, h := range hits {
				log.Printf("\t%+v", h.(gffInterval).Feature)
			}
			continue
		}
		w.Write(f)
	}
	err = sc.Error()
	if err != nil {
		log.Fatalf("error during GFF read: %v", err)
	}
}
Exemple #16
0
func main() {
	flag.Parse()
	if *in == "" {
		flag.Usage()
		os.Exit(1)
	}

	f, err := os.Open(*in)
	if err != nil {
		log.Fatalf("failed to open %q: %v", *in, err)
	}
	defer f.Close()

	names := make(map[string]map[string]struct{})

	sc := featio.NewScanner(gff.NewReader(f))
	for sc.Next() {
		feat := sc.Feat().(*gff.Feature)
		read := feat.FeatAttributes.Get("Read")
		if read == "" {
			continue
		}
		read = strings.Fields(read)[0]
		idx := strings.LastIndex(read, "/")
		e, ok := names[read[:idx]]
		if !ok {
			e = make(map[string]struct{})
			names[read[:idx]] = e
		}
		e[read[idx+1:]] = struct{}{}
	}
	if err := sc.Error(); err != nil {
		log.Fatalf("error during fasta read: %v", err)
	}
	f.Close()

	base := filepath.Base(*in)
	unique, err := os.Create(base + ".unique.text")
	if err != nil {
		log.Fatalf("failed to create %q: %v", base+".unique.text", err)
	}
	defer unique.Close()
	nonUnique, err := os.Create(base + ".non-unique.text")
	if err != nil {
		log.Fatalf("failed to create %q: %v", base+".non-unique.text", err)
	}
	defer nonUnique.Close()
	for name, coords := range names {
		switch len(coords) {
		case 0:
		case 1:
			fmt.Fprintln(unique, name)
		default:
			s := make([]string, 0, len(coords))
			for c := range coords {
				s = append(s, c)
			}
			sort.Strings(s)
			fmt.Fprintf(nonUnique, "%s\t%v\n", name, s)
		}
	}
}
Exemple #17
0
func main() {
	flag.Var(&alnmat, "align", "specify the match, mismatch and gap parameters")
	flag.Parse()
	if *in == "" {
		flag.Usage()
		os.Exit(1)
	}

	f, err := os.Open(*in)
	if err != nil {
		log.Fatalf("failed to open %q: %v", *in, err)
	}
	events := make(map[string][]*gff.Feature)
	fsc := featio.NewScanner(gff.NewReader(f))
	for fsc.Next() {
		f := fsc.Feat().(*gff.Feature)
		fields := strings.Fields(f.FeatAttributes.Get("Read"))
		if len(fields) != 3 {
			log.Fatalf("bad record: %+v", f)
		}
		events[fields[0]] = append(events[fields[0]], f)
	}
	if err := fsc.Error(); err != nil {
		log.Fatalf("error during gff read: %v", err)
	}
	f.Close()

	w := gff.NewWriter(os.Stdout, 60, true)
	w.WriteComment("Right coordinates (field 5) and strand (field 7) are hypothetical.")

	var out *os.File
	if *fastaOut != "" {
		out, err = os.Create(*fastaOut)
		if err != nil {
			log.Fatalf("failed to create fasta insertion output file %q: %v", *fastaOut, err)
		}
		defer out.Close()
	}

	hw := *window / 2
	sw := makeTable(alphabet.DNAgapped, alnmat)
	for _, ref := range flag.Args() {
		f, err = os.Open(ref)
		if err != nil {
			log.Fatalf("failed to open reference %q: %v", ref, err)
		}
		ssc := seqio.NewScanner(fasta.NewReader(f, linear.NewSeq("", nil, alphabet.DNAgapped)))
	loop:
		for ssc.Next() {
			seq := ssc.Seq().(*linear.Seq)
			for _, f := range events[seq.Name()] {
				fields := strings.Fields(f.FeatAttributes.Get("Read"))
				if len(fields) != 3 {
					log.Fatalf("bad record: %+v", f)
				}
				start, err := strconv.Atoi(fields[1])
				if err != nil {
					log.Fatalf("failed to get start coordinate: %v", err)
				}
				end, err := strconv.Atoi(fields[2])
				if err != nil {
					log.Fatalf("failed to get end coordinate: %v", err)
				}

				if out != nil {
					insert := *seq
					if insert.Desc != "" {
						insert.Desc += " "
					}
					insert.Desc += fmt.Sprintf("[%d,%d)", start, end)
					insert.Seq = insert.Seq[start:end]
					fmt.Fprintf(out, "%60a\n", &insert)
				}

				var lOff, lEnd, rOff, rEnd int
				// If we have refined ends, use them.
				if dup := f.FeatAttributes.Get("Dup"); dup != "" {
					d, err := strconv.Atoi(dup)
					if err != nil {
						log.Fatalf("failed to get duplication length: %v", err)
					}
					lOff = max(0, start-d)
					lEnd = start
					rOff = end
					rEnd = min(len(seq.Seq), end+d)
				} else {
					lOff = max(0, start-hw)
					lEnd = min(len(seq.Seq), start+hw)
					rOff = max(0, end-hw)
					rEnd = min(len(seq.Seq), end+hw)

					// Ensure windows don't overlap.
					if lEnd > rOff {
						lEnd = (lEnd + rOff) / 2
						rOff = lEnd
					}
				}

				if lEnd-lOff < *thresh || rEnd-rOff < *thresh {
					// Don't do fruitless work.
					continue loop
				}

				left := *seq
				left.ID = "prefix"
				left.Seq = left.Seq[lOff:lEnd]
				right := *seq
				right.ID = "postfix"
				right.Seq = right.Seq[rOff:rEnd]

				aln, err := sw.Align(&right, &left)
				if err != nil {
					log.Fatal(err)
				}

				fa := align.Format(&right, &left, aln, '-')
				for _, seg := range fa {
					var n int
					for _, l := range seg.(alphabet.Letters) {
						if l != '-' {
							n++
						}
					}
					if n < *thresh {
						continue loop
					}
				}

				var sc int
				for _, seg := range aln {
					type scorer interface {
						Score() int
					}
					sc += seg.(scorer).Score()
				}
				f.FeatAttributes = append(f.FeatAttributes, gff.Attribute{
					Tag: "TSD", Value: fmt.Sprintf(`%v %d %d %v "%v" %d`,
						fa[0], aln[len(aln)-1].Features()[0].End()+lOff,
						aln[0].Features()[1].Start()+rOff, fa[1],
						aln, sc),
				})
				w.Write(f)
			}
		}
		if err := ssc.Error(); err != nil {
			log.Fatalf("error during fasta read: %v", err)
		}
		f.Close()
	}
}