Пример #1
0
func emit_nocall_ref(start_pos int64, n int64, ref_ain *simplestream.SimpleStream, aout *bufio.Writer) (int64, error) {

	end_pos := start_pos + n
	for ; start_pos < end_pos; start_pos++ {

		if ref_ain.Pos >= ref_ain.N {
			if e := ref_ain.Refresh(); e != nil {
				return 0, e
			}
		}

		bp := ref_ain.Buf[ref_ain.Pos]
		ref_ain.Pos++

		switch bp {
		case 'a', 'A':
			aout.WriteByte('A')
		case 'c', 'C':
			aout.WriteByte('C')
		case 'g', 'G':
			aout.WriteByte('G')
		case 't', 'T':
			aout.WriteByte('T')
		default:
			aout.WriteByte(bp)
		}

		gCounter++

	}

	return start_pos, nil

}
Пример #2
0
func convert(pa_ain *simplestream.SimpleStream, aout *os.File) error {
	var bp byte

	bufout := bufio.NewWriter(aout)
	defer bufout.Flush()

	for {
		if pa_ain.Pos >= pa_ain.N {
			if e := pa_ain.Refresh(); e != nil {
				return e
			}
		}

		bp = pa_ain.Buf[pa_ain.Pos]
		pa_ain.Pos++

		switch bp {
		case '=', '#', '*', '\'', 'a', 'Q':
			bufout.WriteByte('a')
		case '~', '&', '+', '"', 'c', 'S':
			bufout.WriteByte('c')
		case '?', ':', '-', ',', 'g', 'W':
			bufout.WriteByte('g')
		case '@', ';', '%', '_', 't', 'd':
			bufout.WriteByte('t')
		case 'A', 'C', 'G', 'T', 'n', 'N':
			bufout.WriteByte('n')
		}

	}

	return nil
}
Пример #3
0
func emit_alt(start_pos int64, n int64, alt_seq string, ref_ain *simplestream.SimpleStream, aout *bufio.Writer) (int64, error) {

	end_pos := start_pos + n
	for ; start_pos < end_pos; start_pos++ {
		if ref_ain.Pos >= ref_ain.N {
			if e := ref_ain.Refresh(); e != nil {
				return 0, e
			}
		}
		ref_ain.Pos++
	}

	sub_len := n
	if n > int64(len(alt_seq)) {
		sub_len = int64(len(alt_seq))
	}

	for i := 0; i < len(alt_seq); i++ {

		if int64(i) < sub_len {
			switch alt_seq[i] {
			case 'a', 'A':
				aout.WriteByte('A')
			case 'c', 'C':
				aout.WriteByte('C')
			case 'g', 'G':
				aout.WriteByte('G')
			case 't', 'T':
				aout.WriteByte('T')
			default:
				aout.WriteByte(alt_seq[i])
			}
		} else {
			switch alt_seq[i] {
			case 'a', 'A':
				aout.WriteByte('b')
			case 'c', 'C':
				aout.WriteByte('d')
			case 'g', 'G':
				aout.WriteByte('h')
			case 't', 'T':
				aout.WriteByte('u')
			default:
				aout.WriteByte(alt_seq[i])
			}
		}

		//aout.WriteByte(alt_seq[i])

	}

	return start_pos, nil

}
Пример #4
0
func peel_ref(start_pos int64, n int64, ref_ain *simplestream.SimpleStream) (string, int64, error) {
	refseq := []byte{}
	end_pos := start_pos + n
	for ; start_pos < end_pos; start_pos++ {

		if ref_ain.Pos >= ref_ain.N {
			if e := ref_ain.Refresh(); e != nil {
				return "", 0, e
			}
		}

		bp := ref_ain.Buf[ref_ain.Pos]
		ref_ain.Pos++

		refseq = append(refseq, bp)
	}

	return string(refseq), start_pos, nil
}
Пример #5
0
func emit_nocall(start_pos int64, n int64, ref_ain *simplestream.SimpleStream, aout *bufio.Writer) error {

	end_pos := start_pos + n
	for ; start_pos < end_pos; start_pos++ {

		if ref_ain.Pos >= ref_ain.N {
			if e := ref_ain.Refresh(); e != nil {
				return e
			}
		}

		bp := ref_ain.Buf[ref_ain.Pos]
		ref_ain.Pos++

		switch bp {
		case 'A', 'a':
			aout.WriteByte('A')
		case 'C', 'c':
			aout.WriteByte('C')
		case 'G', 'g':
			aout.WriteByte('G')
		case 'T', 't':
			aout.WriteByte('T')
		case 'N', 'n':
			aout.WriteByte('n')
		default:
			if bp != 'n' && bp != 'N' {
				fmt.Printf("!!!! %c ... s%d, n%d\n", bp, start_pos, n)
				panic(bp)
			}

			aout.WriteByte(bp)
		}

	}

	return nil
}
Пример #6
0
func _main(c *cli.Context) {

	if c.String("input") == "" {
		fmt.Fprintf(os.Stderr, "Input required, exiting\n")
		cli.ShowAppHelp(c)
		os.Exit(1)
	}

	gff_ain, err := autoio.OpenReadScannerSimple(c.String("input"))
	_ = gff_ain
	if err != nil {
		fmt.Fprintf(os.Stderr, "%v", err)
		os.Exit(1)
	}
	defer gff_ain.Close()

	ref_ain := simplestream.SimpleStream{}
	ref_fp := os.Stdin
	if c.String("ref-input") != "-" {
		var e error
		ref_fp, e = os.Open(c.String("ref-input"))
		if e != nil {
			fmt.Fprintf(os.Stderr, "%v", err)
			os.Exit(1)
		}
		defer ref_fp.Close()
	}
	ref_ain.Init(ref_fp)

	var ref_start int64
	ref_start = 0
	ss := c.Int("ref-start")
	if ss > 0 {
		ref_start = int64(ss)
	}

	var seq_start int64
	seq_start = 0
	_ = seq_start
	ss = c.Int("seq-start")
	if ss > 0 {
		seq_start = int64(ss)
	}

	aout := os.Stdout
	if c.String("output") != "-" {
		aout, err = os.Open(c.String("output"))
		if err != nil {
			fmt.Fprintf(os.Stderr, "%v", err)
			os.Exit(1)
		}
		defer aout.Close()
	}

	if c.Bool("pprof") {
		gProfileFlag = true
		gProfileFile = c.String("pprof-file")
	}

	if c.Bool("mprof") {
		gMemProfileFlag = true
		gMemProfileFile = c.String("mprof-file")
	}

	gVerboseFlag = c.Bool("Verbose")

	if c.Int("max-procs") > 0 {
		runtime.GOMAXPROCS(c.Int("max-procs"))
	}

	allele := c.Int("allele")

	if gProfileFlag {
		prof_f, err := os.Create(gProfileFile)
		if err != nil {
			fmt.Fprintf(os.Stderr, "Could not open profile file %s: %v\n", gProfileFile, err)
			os.Exit(2)
		}

		pprof.StartCPUProfile(prof_f)
		defer pprof.StopCPUProfile()
	}

	e := convert(&gff_ain, &ref_ain, aout, ref_start, allele)
	if e != nil && e != io.EOF {
		panic(e)
	}

	aout.Sync()

}
Пример #7
0
func emit_alt(start_pos int64, n int64, alt_seq string, ref_ain *simplestream.SimpleStream, aout *bufio.Writer) error {
	ref_pos := 0

	var ref_bp byte

	for i := 0; i < len(alt_seq); i++ {

		if int64(i) < n {
			if ref_ain.Pos >= ref_ain.N {
				if e := ref_ain.Refresh(); e != nil {
					return e
				}
			}
			ref_bp = ref_ain.Buf[ref_ain.Pos]

			// REFERENCE CHECK
			//
			lc_ref_bp := ref_bp
			lc_gff_bp := g_GFF_REF[ref_pos]

			if lc_ref_bp == 'A' {
				lc_ref_bp = 'a'
			} else if lc_ref_bp == 'C' {
				lc_ref_bp = 'c'
			} else if lc_ref_bp == 'G' {
				lc_ref_bp = 'g'
			} else if lc_ref_bp == 'T' {
				lc_ref_bp = 't'
			}

			if lc_gff_bp == 'A' {
				lc_gff_bp = 'a'
			} else if lc_gff_bp == 'C' {
				lc_gff_bp = 'c'
			} else if lc_gff_bp == 'G' {
				lc_gff_bp = 'g'
			} else if lc_gff_bp == 'T' {
				lc_gff_bp = 't'
			}

			if lc_ref_bp != lc_gff_bp {
				fmt.Printf("\nREF MISMATCH: GFF reported %c (at %d+%d) but got %c\n", g_GFF_REF[ref_pos], start_pos, ref_pos, ref_bp)
				panic("!!")
			}
			//
			// REFERENCE CHECK

			ref_ain.Pos++
			ref_pos++

			switch ref_bp {
			case 'a', 'A':
				ref_bp = 'a'
			case 'c', 'C':
				ref_bp = 'c'
			case 'g', 'G':
				ref_bp = 'g'
			case 't', 'T':
				ref_bp = 't'
			case 'n', 'N':
				ref_bp = 'n'
			default:
				return fmt.Errorf("invalid character for reference stream ('%c') at %d", ref_bp, ref_pos)
			}

			// It's considered a sub
			//
			if (ref_bp == 'n' || ref_bp == 'N') && (alt_seq[i] != 'n' && alt_seq[i] != 'N') {
				fmt.Printf("\n\n>>>> start_pos %d, n %d, alt_seq %s, ref_bp %c, alt_seq[%d] %c\n\n", start_pos, n, alt_seq, ref_bp, i, alt_seq[i])
				//panic("whoa!!!")
			}

			switch alt_seq[i] {
			case 'a', 'A':
				aout.WriteByte(gSub[ref_bp]['a'])
			case 'c', 'C':
				aout.WriteByte(gSub[ref_bp]['c'])
			case 'g', 'G':
				aout.WriteByte(gSub[ref_bp]['g'])
			case 't', 'T':
				aout.WriteByte(gSub[ref_bp]['t'])
			case 'n', 'N':
				if ref_bp == 'n' || ref_bp == 'N' {
					fmt.Printf("WHOA@! %c (s%d,n%d) [%s]{%d,%c}\n", ref_bp, start_pos, n, alt_seq, i, alt_seq[i])
					panic("-->")
				}
				aout.WriteByte(gSub[ref_bp]['n'])
			default:
				return fmt.Errorf("invalid character for alt sequence ('%c') at pos %d", alt_seq[i], i)
			}

		} else {

			// It's considered an insertion
			//
			switch alt_seq[i] {
			case 'a', 'A':
				aout.WriteByte('Q')
			case 'c', 'C':
				aout.WriteByte('S')
			case 'g', 'G':
				aout.WriteByte('W')
			case 't', 'T':
				aout.WriteByte('d')
			case 'n', 'N':
				aout.WriteByte('^')
			default:
				return fmt.Errorf("invalid character for alt sequence ('%c') at pos. %d", alt_seq[i], i)
			}

		}

	}

	for ; int64(ref_pos) < n; ref_pos++ {
		if ref_ain.Pos >= ref_ain.N {
			if e := ref_ain.Refresh(); e != nil {
				return e
			}
		}
		ref_bp = ref_ain.Buf[ref_ain.Pos]
		ref_ain.Pos++

		switch ref_bp {
		case 'a', 'A':
			ref_bp = '!'
		case 'c', 'C':
			ref_bp = '$'
		case 'g', 'G':
			ref_bp = '7'
		case 't', 'T':
			ref_bp = 'E'
		case 'n', 'N':
			ref_bp = 'z'
		default:
			return fmt.Errorf("invalid character for reference stream ('%c') at %d", ref_bp, ref_pos)
		}

	}

	return nil

}
Пример #8
0
func convert(fa_ain, ref_ain *simplestream.SimpleStream, aout *os.File, start_pos int64) error {
	var e error
	var fa_bp byte

	allele_num := 0

	bufout := bufio.NewWriter(aout)
	defer bufout.Flush()

	for {
		if fa_ain.Pos >= fa_ain.N {
			if e := fa_ain.Refresh(); e != nil {
				return e
			}
		}

		if ref_ain.Pos >= ref_ain.N {
			if e := ref_ain.Refresh(); e != nil {
				return e
			}
		}

		fa_bp = fa_ain.Buf[fa_ain.Pos]
		fa_ain.Pos++

		ref_bp = ref_ain.Buf[ref_ain.Pos]
		ref_ain.Pos++

		if fa_bp != 'n' && fa_bp != 'N' && fa_bp == ref_bp {
			switch fa_bp {
			case 'A':
				aout.WriteByte('a')
			case 'C':
				aout.WriteByte('c')
			case 'G':
				aout.WriteByte('g')
			case 'T':
				aout.WriteByte('t')
			default:
				aout.WriteByte(fa_bp)
			}
		} else if fa_bp == 'N' || fa_bp == 'n' {
			switch ref_bp {
			case 'A', 'a':
				aout.WriteByte('!')
			case 'C', 'c':
				aout.WriteByte('#')
			case 'G', 'g':
				aout.WriteByte('\'')
			case 'T', 't':
				aout.WriteByte('4')
			default:
				aout.WriteByte(ref_bp)
			}
		} else if fa_bp != ref_bp {
			switch fa_bp {
			case 'a', 'A':
				aout.WriteByte('A')
			case 'c', 'C':
				aout.WriteByte('C')
			case 'g', 'G':
				aout.WriteByte('G')
			case 't', 'T':
				aout.WriteByte('T')
			default:
				aout.WriteByte(fa_bp)
			}
		} else {
			aout.WriteByte('-')
		}

	}

	return nil
}
Пример #9
0
func _main(c *cli.Context) {
	var err error

	if c.String("input") == "" {
		fmt.Fprintf(os.Stderr, "Input required, exiting\n")
		cli.ShowAppHelp(c)
		os.Exit(1)
	}

	pasta_ain := simplestream.SimpleStream{}
	pasta_fp := os.Stdin
	if len(c.String("input")) > 0 && c.String("input") != "-" {
		var e error
		pasta_fp, e = os.Open(c.String("input"))
		if e != nil {
			fmt.Fprintf(os.Stderr, "%v", e)
			os.Exit(1)
		}
		defer pasta_fp.Close()
	}
	pasta_ain.Init(pasta_fp)

	aout := os.Stdout
	if c.String("output") != "-" {
		aout, err = os.Open(c.String("output"))
		if err != nil {
			fmt.Fprintf(os.Stderr, "%v", err)
			os.Exit(1)
		}
		defer aout.Close()
	}

	/*
	  aout := os.Stdout
	  if err!=nil {
	    fmt.Fprintf(os.Stderr, "%v", err)
	    os.Exit(1)
	  }
	*/

	if c.Bool("pprof") {
		gProfileFlag = true
		gProfileFile = c.String("pprof-file")
	}

	if c.Bool("mprof") {
		gMemProfileFlag = true
		gMemProfileFile = c.String("mprof-file")
	}

	gVerboseFlag = c.Bool("Verbose")

	if c.Int("max-procs") > 0 {
		runtime.GOMAXPROCS(c.Int("max-procs"))
	}

	if gProfileFlag {
		prof_f, err := os.Create(gProfileFile)
		if err != nil {
			fmt.Fprintf(os.Stderr, "Could not open profile file %s: %v\n", gProfileFile, err)
			os.Exit(2)
		}

		pprof.StartCPUProfile(prof_f)
		defer pprof.StopCPUProfile()
	}

	convert(&pasta_ain, aout)

}
Пример #10
0
func convert(ref_ain *simplestream.SimpleStream, seq_ain *simplestream.SimpleStream, fout *os.File, start_pos int64, allele_num int) error {

	aout := bufio.NewWriter(fout)
	if g_convert_debug {
		defer func() { fmt.Printf("\n"); aout.Flush() }()
	} else {
		defer aout.Flush()
	}

	icount := 0
	ocount := 0

	//DEBUG
	if g_convert_debug {
		fmt.Printf("start\n")
	}

	for true {

		//DEBUG
		if g_convert_debug {
			fmt.Printf("ref_ain.Pos %d, ref_ain.N %d\n", ref_ain.Pos, ref_ain.N)
		}

		if ref_ain.Pos >= ref_ain.N {
			if e := ref_ain.Refresh(); e != nil {
				return e
			}
		}
		bp_ref := ref_ain.Buf[ref_ain.Pos]
		ref_ain.Pos++

		//DEBUG
		if g_convert_debug {
			fmt.Printf("seq_ain.Pos %d, seq_ain.N %d\n", seq_ain.Pos, seq_ain.N)
		}

		if seq_ain.Pos >= seq_ain.N {
			if e := seq_ain.Refresh(); e != nil {
				return e
			}
		}
		bp_seq := seq_ain.Buf[seq_ain.Pos]
		seq_ain.Pos++

		//DEBUG
		if g_convert_debug {
			fmt.Printf("%d,%d bp_ref %c, bp_seq %c\n", icount, ocount, bp_ref, bp_seq)
		}

		if bp_ref == bp_seq && bp_ref == '\n' {
			break
		}

		if bp_ref == bp_seq {

			switch bp_ref {
			case 'a', 'A':
				aout.WriteByte(gSub[bp_ref]['a'])
			case 'c', 'C':
				aout.WriteByte(gSub[bp_ref]['c'])
			case 'g', 'G':
				aout.WriteByte(gSub[bp_ref]['g'])
			case 't', 'T':
				aout.WriteByte(gSub[bp_ref]['t'])
			case 'n', 'N':
				if bp_ref == 'n' || bp_ref == 'N' {
					return fmt.Errorf("no-call to no-call match: bf_ref:%c i:%d o:%d\n", bp_ref, icount, ocount)
				}
				aout.WriteByte(gSub[bp_ref]['n'])
			default:
				return fmt.Errorf("invalid character for alt sequence ('%c') at i:%d o:%d", bp_ref, icount, ocount)
			}

		} else if bp_seq == '-' {

			switch bp_ref {
			case 'a', 'A':
				aout.WriteByte('!')
			case 'c', 'C':
				aout.WriteByte('$')
			case 'g', 'G':
				aout.WriteByte('7')
			case 't', 'T':
				aout.WriteByte('E')
			case 'n', 'N':
				aout.WriteByte('z')
			default:
				return fmt.Errorf("invalid character for alt sequence ('%c') at i:%d o:%d", bp_ref, icount, ocount)
			}

		} else if bp_ref == '-' {

			switch bp_seq {
			case 'a', 'A':
				aout.WriteByte('Q')
			case 'c', 'C':
				aout.WriteByte('S')
			case 'g', 'G':
				aout.WriteByte('W')
			case 't', 'T':
				aout.WriteByte('d')
			case 'n', 'N':
				aout.WriteByte('f')
			default:
				return fmt.Errorf("invalid character for alt sequence ('%c') at i:%d o:%d", bp_ref, icount, ocount)
			}

		} else {
			ref_lc := bp_ref
			if bp_ref <= 70 {
				ref_lc += 32
			}

			seq_lc := bp_seq
			if bp_seq <= 70 {
				seq_lc += 32
			}

			aout.WriteByte(gSub[ref_lc][seq_lc])
		}

		icount++
		ocount++

	}

	return nil

}