Esempio n. 1
0
func processServer(index interval.Tree, queue, output chan *feat.Feature, wg *sync.WaitGroup) {
	defer wg.Done()
	var (
		buffer      []byte   = make([]byte, 0, annotationLength)
		annotations Matches  = make(Matches, 0, maxAnnotations+1)
		o           *Overlap = &Overlap{&annotations}
		prefix      string   = ` ; Annot "`
		blank       string   = prefix + strings.Repeat("-", mapLength)
		overlap     int
	)

	for feature := range queue {
		annotations = annotations[:0]
		heap.Init(&Overlap{&annotations})
		buffer = buffer[:0]
		buffer = append(buffer, []byte(blank)...)
		if query, err := interval.New(string(feature.Location), feature.Start, feature.End, 0, nil); err != nil {
			fmt.Fprintf(os.Stderr, "Feature has end < start: %v\n", feature)
			continue
		} else {
			overlap = int(float64(feature.Len()) * minOverlap)
			if results := index.Intersect(query, overlap); results != nil {
				for hit := range results {
					o.Push(Match{
						Interval: hit,
						Overlap:  util.Min(hit.End(), query.End()) - util.Max(hit.Start(), query.Start()),
						Strand:   feature.Strand,
					})
					if len(annotations) > maxAnnotations {
						o.Pop()
					}
				}
			}
		}
		if len(annotations) > 0 {
			sort.Sort(&Start{&annotations})
			buffer = makeAnnotation(feature, annotations, len(prefix), buffer)
		}

		buffer = append(buffer, '"')
		feature.Attributes += string(buffer)
		output <- feature
	}
}
Esempio n. 2
0
func main() {
	var (
		region *bed.Reader
		motif  *bed.Reader
		err    error
	)

	motifName := flag.String("motif", "", "Filename for motif file.")
	regionName := flag.String("region", "", "Filename for region file.")
	verbose := flag.Bool("verbose", false, "Print details of identified motifs to stderr.")
	headerLine := flag.Bool("header", false, "Print a header line.")
	help := flag.Bool("help", false, "Print this usage message.")

	flag.Usage = func() {
		fmt.Fprintf(os.Stderr, "Usage: %s -motif <motif file> -region <region file>\n", os.Args[0])
		flag.PrintDefaults()
	}

	flag.Parse()

	if *help || *regionName == "" || *motifName == "" {
		flag.Usage()
		os.Exit(1)
	}

	// Open files
	if motif, err = bed.NewReaderName(*motifName, 3); err != nil {
		fmt.Fprintf(os.Stderr, "Error: %v.", err)
		os.Exit(0)
	} else {
		fmt.Fprintf(os.Stderr, "Reading motif features from `%s'.\n", *motifName)
	}
	defer motif.Close()

	if region, err = bed.NewReaderName(*regionName, 3); err != nil {
		fmt.Fprintf(os.Stderr, "Error: %v.", err)
		os.Exit(0)
	} else {
		fmt.Fprintf(os.Stderr, "Reading region features from `%s'.\n", *regionName)
	}
	defer region.Close()

	// Read in motif features and build interval tree to search
	intervalTree := interval.NewTree()

	for line := 1; ; line++ {
		if motifLine, err := motif.Read(); err != nil {
			break
		} else {
			if motifInterval, err := interval.New(string(motifLine.Location), motifLine.Start, motifLine.End, 0, nil); err == nil {
				intervalTree.Insert(motifInterval)
			} else {
				fmt.Fprintf(os.Stderr, "Line: %d: Feature has end < start: %v\n", line, motifLine)
			}
		}
	}

	// Read in region features and search for motifs within region
	// Calculate median motif location, sample standard deviation of locations
	// and mean distance of motif from midpoint of region for motifs contained
	// within region. Report these and n of motifs within region.
	if *headerLine {
		fmt.Println("Chromosome\tStart\tEnd\tn-hits\tMeanHitPos\tStddevHitPos\tMeanMidDistance")
	}
	for line := 1; ; line++ {
		if regionLine, err := region.Read(); err != nil {
			break
		} else {
			regionMidPoint := float64(regionLine.Start+regionLine.End) / 2
			if regionInterval, err := interval.New(string(regionLine.Location), regionLine.Start, regionLine.End, 0, regionMidPoint); err == nil {
				if *verbose {
					fmt.Fprintf(os.Stderr, "%s\t%d\t%d\n", regionLine.Location, regionLine.Start, regionLine.End)
				}
				sumOfDiffs, sumOfSquares, mean, oldmean, n := 0., 0., 0., 0., 0.
				for intersector := range intervalTree.Within(regionInterval, 0) {
					motifMidPoint := float64(intersector.Start()+intersector.End()) / 2
					if *verbose {
						fmt.Fprintf(os.Stderr, "\t%s\t%d\t%d\n", intersector.Chromosome(), intersector.Start(), intersector.End())
					}

					// The Method of Provisional Means
					n++
					mean = oldmean + (motifMidPoint-oldmean)/n
					sumOfSquares += (motifMidPoint - oldmean) * (motifMidPoint - mean)
					oldmean = mean

					sumOfDiffs += math.Abs(motifMidPoint - regionMidPoint)
				}
				fmt.Printf("%s\t%d\t%d\t%0.f\t%0.f\t%f\t%f\n",
					regionLine.Location, regionLine.Start, regionLine.End,
					n, mean, math.Sqrt(sumOfSquares/(n-1)), sumOfDiffs/n)
			} else {
				fmt.Fprintf(os.Stderr, "Line: %d: Feature has end < start: %v\n", line, regionLine)
			}
		}
	}

}
Esempio n. 3
0
func main() {
	var (
		target    *gff.Reader
		source    *gff.Reader
		out       *gff.Writer
		indexFile *os.File
		e         error
		store     bool
	)

	targetName := flag.String("target", "", "Filename for input to be annotated. Defaults to stdin.")
	sourceName := flag.String("source", "", "Filename for source annotation.")
	indexName := flag.String("index", "", "Filename for index cache.")
	outName := flag.String("out", "", "Filename for output. Defaults to stdout.")
	flag.Float64Var(&minOverlap, "overlap", 0.05, "Overlap between features.")
	threads := flag.Int("threads", 2, "Number of threads to use.")
	bufferLen := flag.Int("buffer", 1000, "Length of ouput buffer.")
	help := flag.Bool("help", false, "Print this usage message.")

	flag.Parse()

	runtime.GOMAXPROCS(*threads)
	fmt.Fprintf(os.Stderr, "Using %d threads.\n", runtime.GOMAXPROCS(0))

	if *help || *sourceName == "" {
		flag.Usage()
		os.Exit(1)
	}

	if *targetName == "" {
		fmt.Fprintln(os.Stderr, "Reading PALS features from stdin.")
		target = gff.NewReader(os.Stdin)
	} else if target, e = gff.NewReaderName(*targetName); e != nil {
		fmt.Fprintf(os.Stderr, "Error: %v.", e)
		os.Exit(0)
	} else {
		fmt.Fprintf(os.Stderr, "Reading target features from `%s'.\n", *targetName)
	}
	defer target.Close()

	switch {
	case *indexName == "" && *sourceName == "":
		fmt.Fprintln(os.Stderr, "No source or index provided.")
		os.Exit(0)
	case *indexName != "" && *sourceName == "":
		if indexFile, e = os.Open(*indexName); e != nil {
			fmt.Fprintf(os.Stderr, "Error: %v.\n", e)
			os.Exit(0)
		}
		defer indexFile.Close()
		store = false
	case *indexName != "" && *sourceName != "":
		if indexFile, e = os.Create(*indexName); e != nil {
			fmt.Fprintf(os.Stderr, "Error: %v.\n", e)
			os.Exit(0)
		}
		defer indexFile.Close()
		store = true
		fallthrough
	case *indexName == "" && *sourceName != "":
		if source, e = gff.NewReaderName(*sourceName); e != nil {
			fmt.Fprintf(os.Stderr, "Error: %v.\n", e)
			os.Exit(0)
		}
		fmt.Fprintf(os.Stderr, "Reading annotation features from `%s'.\n", *sourceName)
		defer source.Close()
	}

	if *outName == "" {
		fmt.Fprintln(os.Stderr, "Writing annotation to stdout.")
		out = gff.NewWriter(os.Stdout, 2, 60, false)
	} else if out, e = gff.NewWriterName(*outName, 2, 60, true); e != nil {
		fmt.Fprintf(os.Stderr, "Error: %v.", e)
	} else {
		fmt.Fprintf(os.Stderr, "Writing annotation to `%s'.\n", *outName)
	}
	defer out.Close()

	intervalTree := interval.NewTree()

	for count := 0; ; count++ {
		if repeat, err := source.Read(); err != nil {
			break
		} else {
			fmt.Fprintf(os.Stderr, "Line: %d\r", count)
			repData := &RepeatRecord{}
			repData.Parse(repeat.Attributes)
			if repInterval, err := interval.New(string(repeat.Location), repeat.Start, repeat.End, 0, *repData); err == nil {
				intervalTree.Insert(repInterval)
			} else {
				fmt.Fprintf(os.Stderr, "Feature has end < start: %v\n", repeat)
			}
		}
	}

	process := make(chan *feat.Feature)
	buffer := make(chan *feat.Feature, *bufferLen)
	processWg := &sync.WaitGroup{}
	outputWg := &sync.WaitGroup{}

	if *threads < 2 {
		*threads = 2
	}
	for i := 0; i < *threads-1; i++ {
		processWg.Add(1)
		go processServer(intervalTree, process, buffer, processWg)
	}

	//output server
	outputWg.Add(1)
	go func() {
		defer outputWg.Done()
		for feature := range buffer {
			out.Write(feature)
		}
	}()

	for {
		if feature, err := target.Read(); err == nil {
			process <- feature
		} else {
			close(process)
			break
		}
	}

	if store {
		enc := gob.NewEncoder(indexFile)
		if e := enc.Encode(intervalTree); e != nil {
			fmt.Fprintf(os.Stderr, "Error: %v.\n", e)
			os.Exit(0)
		}
	}

	processWg.Wait()
	close(buffer)
	outputWg.Wait()
}
Esempio n. 4
0
func (self Alignment) Stitch(f feat.FeatureSet) (a Alignment, err error) {
	for _, s := range self {
		if !s.Inplace && s.Quality != nil && s.Quality.Inplace {
			return nil, bio.NewError("Inplace operation on Quality with non-Inplace operation on parent Seq.", 0, s)
		}
	}

	t := interval.NewTree()
	var i *interval.Interval

	for _, feature := range f {
		if i, err = interval.New("", feature.Start, feature.End, 0, nil); err != nil {
			return nil, err
		} else {
			t.Insert(i)
		}
	}

	start := self.Start()
	a = make(Alignment, len(self))
	span, err := interval.New("", start, self.End(), 0, nil)
	if err != nil {
		panic("Seq.End() < Seq.Start()")
	}
	fs, _ := t.Flatten(span, 0, 0)

	var offset int
	for i, s := range self {
		if s.Inplace {
			s.Seq = s.stitch(fs)
			if s.Offset -= fs[0].Start(); offset < 0 {
				s.Offset = 0
			}
			s.Circular = false
			if s.Quality != nil {
				var q *Quality
				if s.Quality.Inplace {
					q = s.Quality
				} else {
					q = &Quality{ID: s.Quality.ID}
				}
				q.Qual = s.Quality.stitch(fs)
				if q.Offset = s.Quality.Offset - fs[0].Start(); q.Offset < 0 {
					q.Offset = 0
				}
				q.Circular = false
				s.Quality = q
			}
			a[i] = s
		} else {
			var q *Quality
			if s.Quality != nil {
				if offset = s.Quality.Offset - fs[0].Start(); offset < 0 {
					offset = 0
				}
				q = &Quality{
					ID:       s.Quality.ID,
					Qual:     s.Quality.stitch(fs),
					Offset:   offset,
					Circular: false,
				}
			}
			if offset = s.Offset - fs[0].Start(); offset < 0 {
				offset = 0
			}
			a[i] = &Seq{
				ID:       s.ID,
				Seq:      s.stitch(fs),
				Offset:   offset,
				Strand:   s.Strand,
				Circular: false,
				Moltype:  s.Moltype,
				Quality:  q,
			}
		}
	}

	return
}