func processServer(index interval.Tree, queue, output chan *feat.Feature, wg *sync.WaitGroup) { defer wg.Done() var ( buffer []byte = make([]byte, 0, annotationLength) annotations Matches = make(Matches, 0, maxAnnotations+1) o *Overlap = &Overlap{&annotations} prefix string = ` ; Annot "` blank string = prefix + strings.Repeat("-", mapLength) overlap int ) for feature := range queue { annotations = annotations[:0] heap.Init(&Overlap{&annotations}) buffer = buffer[:0] buffer = append(buffer, []byte(blank)...) if query, err := interval.New(string(feature.Location), feature.Start, feature.End, 0, nil); err != nil { fmt.Fprintf(os.Stderr, "Feature has end < start: %v\n", feature) continue } else { overlap = int(float64(feature.Len()) * minOverlap) if results := index.Intersect(query, overlap); results != nil { for hit := range results { o.Push(Match{ Interval: hit, Overlap: util.Min(hit.End(), query.End()) - util.Max(hit.Start(), query.Start()), Strand: feature.Strand, }) if len(annotations) > maxAnnotations { o.Pop() } } } } if len(annotations) > 0 { sort.Sort(&Start{&annotations}) buffer = makeAnnotation(feature, annotations, len(prefix), buffer) } buffer = append(buffer, '"') feature.Attributes += string(buffer) output <- feature } }
func main() { var ( region *bed.Reader motif *bed.Reader err error ) motifName := flag.String("motif", "", "Filename for motif file.") regionName := flag.String("region", "", "Filename for region file.") verbose := flag.Bool("verbose", false, "Print details of identified motifs to stderr.") headerLine := flag.Bool("header", false, "Print a header line.") help := flag.Bool("help", false, "Print this usage message.") flag.Usage = func() { fmt.Fprintf(os.Stderr, "Usage: %s -motif <motif file> -region <region file>\n", os.Args[0]) flag.PrintDefaults() } flag.Parse() if *help || *regionName == "" || *motifName == "" { flag.Usage() os.Exit(1) } // Open files if motif, err = bed.NewReaderName(*motifName, 3); err != nil { fmt.Fprintf(os.Stderr, "Error: %v.", err) os.Exit(0) } else { fmt.Fprintf(os.Stderr, "Reading motif features from `%s'.\n", *motifName) } defer motif.Close() if region, err = bed.NewReaderName(*regionName, 3); err != nil { fmt.Fprintf(os.Stderr, "Error: %v.", err) os.Exit(0) } else { fmt.Fprintf(os.Stderr, "Reading region features from `%s'.\n", *regionName) } defer region.Close() // Read in motif features and build interval tree to search intervalTree := interval.NewTree() for line := 1; ; line++ { if motifLine, err := motif.Read(); err != nil { break } else { if motifInterval, err := interval.New(string(motifLine.Location), motifLine.Start, motifLine.End, 0, nil); err == nil { intervalTree.Insert(motifInterval) } else { fmt.Fprintf(os.Stderr, "Line: %d: Feature has end < start: %v\n", line, motifLine) } } } // Read in region features and search for motifs within region // Calculate median motif location, sample standard deviation of locations // and mean distance of motif from midpoint of region for motifs contained // within region. Report these and n of motifs within region. if *headerLine { fmt.Println("Chromosome\tStart\tEnd\tn-hits\tMeanHitPos\tStddevHitPos\tMeanMidDistance") } for line := 1; ; line++ { if regionLine, err := region.Read(); err != nil { break } else { regionMidPoint := float64(regionLine.Start+regionLine.End) / 2 if regionInterval, err := interval.New(string(regionLine.Location), regionLine.Start, regionLine.End, 0, regionMidPoint); err == nil { if *verbose { fmt.Fprintf(os.Stderr, "%s\t%d\t%d\n", regionLine.Location, regionLine.Start, regionLine.End) } sumOfDiffs, sumOfSquares, mean, oldmean, n := 0., 0., 0., 0., 0. for intersector := range intervalTree.Within(regionInterval, 0) { motifMidPoint := float64(intersector.Start()+intersector.End()) / 2 if *verbose { fmt.Fprintf(os.Stderr, "\t%s\t%d\t%d\n", intersector.Chromosome(), intersector.Start(), intersector.End()) } // The Method of Provisional Means n++ mean = oldmean + (motifMidPoint-oldmean)/n sumOfSquares += (motifMidPoint - oldmean) * (motifMidPoint - mean) oldmean = mean sumOfDiffs += math.Abs(motifMidPoint - regionMidPoint) } fmt.Printf("%s\t%d\t%d\t%0.f\t%0.f\t%f\t%f\n", regionLine.Location, regionLine.Start, regionLine.End, n, mean, math.Sqrt(sumOfSquares/(n-1)), sumOfDiffs/n) } else { fmt.Fprintf(os.Stderr, "Line: %d: Feature has end < start: %v\n", line, regionLine) } } } }
func main() { var ( target *gff.Reader source *gff.Reader out *gff.Writer indexFile *os.File e error store bool ) targetName := flag.String("target", "", "Filename for input to be annotated. Defaults to stdin.") sourceName := flag.String("source", "", "Filename for source annotation.") indexName := flag.String("index", "", "Filename for index cache.") outName := flag.String("out", "", "Filename for output. Defaults to stdout.") flag.Float64Var(&minOverlap, "overlap", 0.05, "Overlap between features.") threads := flag.Int("threads", 2, "Number of threads to use.") bufferLen := flag.Int("buffer", 1000, "Length of ouput buffer.") help := flag.Bool("help", false, "Print this usage message.") flag.Parse() runtime.GOMAXPROCS(*threads) fmt.Fprintf(os.Stderr, "Using %d threads.\n", runtime.GOMAXPROCS(0)) if *help || *sourceName == "" { flag.Usage() os.Exit(1) } if *targetName == "" { fmt.Fprintln(os.Stderr, "Reading PALS features from stdin.") target = gff.NewReader(os.Stdin) } else if target, e = gff.NewReaderName(*targetName); e != nil { fmt.Fprintf(os.Stderr, "Error: %v.", e) os.Exit(0) } else { fmt.Fprintf(os.Stderr, "Reading target features from `%s'.\n", *targetName) } defer target.Close() switch { case *indexName == "" && *sourceName == "": fmt.Fprintln(os.Stderr, "No source or index provided.") os.Exit(0) case *indexName != "" && *sourceName == "": if indexFile, e = os.Open(*indexName); e != nil { fmt.Fprintf(os.Stderr, "Error: %v.\n", e) os.Exit(0) } defer indexFile.Close() store = false case *indexName != "" && *sourceName != "": if indexFile, e = os.Create(*indexName); e != nil { fmt.Fprintf(os.Stderr, "Error: %v.\n", e) os.Exit(0) } defer indexFile.Close() store = true fallthrough case *indexName == "" && *sourceName != "": if source, e = gff.NewReaderName(*sourceName); e != nil { fmt.Fprintf(os.Stderr, "Error: %v.\n", e) os.Exit(0) } fmt.Fprintf(os.Stderr, "Reading annotation features from `%s'.\n", *sourceName) defer source.Close() } if *outName == "" { fmt.Fprintln(os.Stderr, "Writing annotation to stdout.") out = gff.NewWriter(os.Stdout, 2, 60, false) } else if out, e = gff.NewWriterName(*outName, 2, 60, true); e != nil { fmt.Fprintf(os.Stderr, "Error: %v.", e) } else { fmt.Fprintf(os.Stderr, "Writing annotation to `%s'.\n", *outName) } defer out.Close() intervalTree := interval.NewTree() for count := 0; ; count++ { if repeat, err := source.Read(); err != nil { break } else { fmt.Fprintf(os.Stderr, "Line: %d\r", count) repData := &RepeatRecord{} repData.Parse(repeat.Attributes) if repInterval, err := interval.New(string(repeat.Location), repeat.Start, repeat.End, 0, *repData); err == nil { intervalTree.Insert(repInterval) } else { fmt.Fprintf(os.Stderr, "Feature has end < start: %v\n", repeat) } } } process := make(chan *feat.Feature) buffer := make(chan *feat.Feature, *bufferLen) processWg := &sync.WaitGroup{} outputWg := &sync.WaitGroup{} if *threads < 2 { *threads = 2 } for i := 0; i < *threads-1; i++ { processWg.Add(1) go processServer(intervalTree, process, buffer, processWg) } //output server outputWg.Add(1) go func() { defer outputWg.Done() for feature := range buffer { out.Write(feature) } }() for { if feature, err := target.Read(); err == nil { process <- feature } else { close(process) break } } if store { enc := gob.NewEncoder(indexFile) if e := enc.Encode(intervalTree); e != nil { fmt.Fprintf(os.Stderr, "Error: %v.\n", e) os.Exit(0) } } processWg.Wait() close(buffer) outputWg.Wait() }
func (self Alignment) Stitch(f feat.FeatureSet) (a Alignment, err error) { for _, s := range self { if !s.Inplace && s.Quality != nil && s.Quality.Inplace { return nil, bio.NewError("Inplace operation on Quality with non-Inplace operation on parent Seq.", 0, s) } } t := interval.NewTree() var i *interval.Interval for _, feature := range f { if i, err = interval.New("", feature.Start, feature.End, 0, nil); err != nil { return nil, err } else { t.Insert(i) } } start := self.Start() a = make(Alignment, len(self)) span, err := interval.New("", start, self.End(), 0, nil) if err != nil { panic("Seq.End() < Seq.Start()") } fs, _ := t.Flatten(span, 0, 0) var offset int for i, s := range self { if s.Inplace { s.Seq = s.stitch(fs) if s.Offset -= fs[0].Start(); offset < 0 { s.Offset = 0 } s.Circular = false if s.Quality != nil { var q *Quality if s.Quality.Inplace { q = s.Quality } else { q = &Quality{ID: s.Quality.ID} } q.Qual = s.Quality.stitch(fs) if q.Offset = s.Quality.Offset - fs[0].Start(); q.Offset < 0 { q.Offset = 0 } q.Circular = false s.Quality = q } a[i] = s } else { var q *Quality if s.Quality != nil { if offset = s.Quality.Offset - fs[0].Start(); offset < 0 { offset = 0 } q = &Quality{ ID: s.Quality.ID, Qual: s.Quality.stitch(fs), Offset: offset, Circular: false, } } if offset = s.Offset - fs[0].Start(); offset < 0 { offset = 0 } a[i] = &Seq{ ID: s.ID, Seq: s.stitch(fs), Offset: offset, Strand: s.Strand, Circular: false, Moltype: s.Moltype, Quality: q, } } } return }