func (g *FASTAInfo) Stream(pasta_stream *bufio.Reader, out *bufio.Writer) error { var ch byte var e error var msg pasta.ControlMessage curStreamState := pasta.BEG _ = curStreamState for { ch, e = pasta_stream.ReadByte() for (e == nil) && ((ch == '\n') || (ch == ' ') || (ch == '\r') || (ch == '\t')) { ch, e = pasta_stream.ReadByte() } if e != nil { break } if ch == '>' { msg, e = pasta.ControlMessageProcess(pasta_stream) if e != nil { return fmt.Errorf(fmt.Sprintf("invalid control message %v (%v)", msg, e)) } if (msg.Type == pasta.REF) || (msg.Type == pasta.NOC) { curStreamState = pasta.MSG_REF_NOC } else if msg.Type == pasta.CHROM { curStreamState = pasta.MSG_CHROM } else if msg.Type == pasta.POS { curStreamState = pasta.MSG_POS } else { //just ignore continue //return fmt.Errorf("invalid message type") } } if g.Allele == 0 { alt_ch, ok := pasta.AltMap[ch] if ok { g.WriteFASTAByte(_tolch(alt_ch), out) } } else { ref_ch, ok := pasta.RefMap[ch] if ok { g.WriteFASTAByte(_tolch(ref_ch), out) } } } out.Flush() if e != io.EOF { return e } return nil }
func pasta_filter(pasta_stream *bufio.Reader, out *bufio.Writer, start, n int) error { var msg pasta.ControlMessage var pasta_stream_pos int var dbp int _ = dbp var curStreamState int _ = curStreamState var pasta_ref_pos int _ = pasta_ref_pos ref_pos := 0 message_processed_flag := false for { //var ch1 byte ch, e := pasta_stream.ReadByte() for (e == nil) && ((ch == '\n') || (ch == ' ') || (ch == '\r') || (ch == '\t')) { ch, e = pasta_stream.ReadByte() } if e != nil { break } if ch == '>' { msg, e = pasta.ControlMessageProcess(pasta_stream) if e != nil { return fmt.Errorf("invalid control message") } if (msg.Type == pasta.REF) || (msg.Type == pasta.NOC) { curStreamState = pasta.MSG } else if msg.Type == pasta.POS { ref_pos = msg.RefPos } else { //ignore // continue } pasta.ControlMessagePrint(&msg, out) message_processed_flag = true continue } if message_processed_flag { out.WriteByte('\n') } message_processed_flag = false pasta_stream_pos++ // special case: nop // if ch == '.' { continue } dbp = pasta.RefDelBP[ch] //anch_bp := ch is_del := false _ = is_del is_ins := false _ = is_ins is_ref := false _ = is_ref is_noc := false _ = is_noc if ch == '!' || ch == '$' || ch == '7' || ch == 'E' || ch == 'z' { is_del = true } else if ch == 'Q' || ch == 'S' || ch == 'W' || ch == 'd' || ch == 'Z' { is_ins = true } else if ch == 'a' || ch == 'c' || ch == 'g' || ch == 't' { is_ref = true } else if ch == 'n' || ch == 'N' || ch == 'A' || ch == 'C' || ch == 'G' || ch == 'T' { is_noc = true } if (ref_pos >= start) && (ref_pos < (start + n)) { out.WriteByte(ch) } // Add to reference sequence // for { if is_ins { break } ref_pos++ break } } return nil }
func interleave_filter(pasta_stream *bufio.Reader, out *bufio.Writer, start, n int) error { var msg pasta.ControlMessage var e error var e0 error var pasta_stream0_pos, pasta_stream1_pos int var dbp0, dbp1 int _, _ = dbp0, dbp1 var curStreamState int _ = curStreamState var pasta_ref_pos int _ = pasta_ref_pos bp_count := 0 lfmod := 50 _ = lfmod ref_pos := 0 ch := [2]byte{} message_processed_flag := false for { //var ch1 byte var e1 error ch[0], e0 = pasta_stream.ReadByte() for (e0 == nil) && ((ch[0] == '\n') || (ch[0] == ' ') || (ch[0] == '\r') || (ch[0] == '\t')) { ch[0], e0 = pasta_stream.ReadByte() } if e0 != nil { break } if ch[0] == '>' { msg, e = pasta.ControlMessageProcess(pasta_stream) if e != nil { return fmt.Errorf("invalid control message") } if (msg.Type == pasta.REF) || (msg.Type == pasta.NOC) { curStreamState = pasta.MSG } else if msg.Type == pasta.POS { ref_pos = msg.RefPos } pasta.ControlMessagePrint(&msg, out) message_processed_flag = true continue } if message_processed_flag { out.WriteByte('\n') } message_processed_flag = false ch[1], e1 = pasta_stream.ReadByte() for (e1 == nil) && ((ch[1] == '\n') || (ch[1] == ' ') || (ch[1] == '\r') || (ch[1] == '\t')) { ch[1], e1 = pasta_stream.ReadByte() } if e1 != nil { break } pasta_stream0_pos++ pasta_stream1_pos++ // special case: nop // if ch[0] == '.' && ch[1] == '.' { continue } dbp0 = pasta.RefDelBP[ch[0]] dbp1 = pasta.RefDelBP[ch[1]] anch_bp := ch[0] if anch_bp == '.' { anch_bp = ch[1] } is_del := []bool{false, false} is_ins := []bool{false, false} is_ref := []bool{false, false} _ = is_ref is_noc := []bool{false, false} _ = is_noc for aa := 0; aa < 2; aa++ { if ch[aa] == '!' || ch[aa] == '$' || ch[aa] == '7' || ch[aa] == 'E' || ch[aa] == 'z' { is_del[aa] = true } else if ch[aa] == 'Q' || ch[aa] == 'S' || ch[aa] == 'W' || ch[aa] == 'd' || ch[aa] == 'Z' { is_ins[aa] = true } else if ch[aa] == 'a' || ch[aa] == 'c' || ch[aa] == 'g' || ch[aa] == 't' { is_ref[aa] = true } else if ch[aa] == 'n' || ch[aa] == 'N' || ch[aa] == 'A' || ch[aa] == 'C' || ch[aa] == 'G' || ch[aa] == 'T' { is_noc[aa] = true } } if (is_ins[0] && (!is_ins[1] && ch[1] != '.')) || (is_ins[1] && (!is_ins[0] && ch[0] != '.')) { return fmt.Errorf(fmt.Sprintf("insertion mismatch (ch %c,%c ord(%v,%v) @ %v)", ch[0], ch[1], ch[0], ch[1], bp_count)) } if (ref_pos >= start) && (ref_pos < (start + n)) { if ref_pos == start { out.WriteString(fmt.Sprintf(">P{%d}\n", ref_pos)) } out.WriteByte(ch[0]) out.WriteByte(ch[1]) } // Add to reference sequence // for { if is_ins[0] || is_ins[1] { break } ref_pos++ break } } return nil }
// Read from an interleaved stream and print out a simplified variant difference format // // Each token from the stream should be interleaved and aligned. Each token can be processed // two at a time, where the first token is from the first stream and the second is from // the second stream. The resulting difference format spits out contigs of ref, non-ref and // alts where appropriate. // // The 'process' callback will be called for every variant line that gets processed. // func interleave_to_diff_iface(stream *bufio.Reader, p RefVarPrinter, w io.Writer) error { alt0 := []byte{} alt1 := []byte{} refseq := []byte{} ref_start := 0 ref0_len := 0 ref1_len := 0 stream0_pos := 0 stream1_pos := 0 info := RefVarInfo{} //info := GVCFVarInfo{} info.Type = pasta.BEG info.MessageType = pasta.BEG info.RefSeqFlag = gFullRefSeqFlag info.NocSeqFlag = gFullNocSeqFlag info.Out = os.Stdout info.Chrom = "unk" out := bufio.NewWriter(w) var bp_anchor_ref byte var bp_anchor_prv byte curStreamState := pasta.BEG _ = curStreamState prvStreamState := pasta.BEG _ = prvStreamState var msg pasta.ControlMessage var prev_msg pasta.ControlMessage var e error var ch1 byte var e1 error var dbp0 int var dbp1 int for { is_ref0 := false is_ref1 := false is_noc0 := false is_noc1 := false message_processed_flag := false ch0, e0 := stream.ReadByte() for (e0 == nil) && ((ch0 == '\n') || (ch0 == ' ') || (ch0 == '\r') || (ch0 == '\t')) { ch0, e0 = stream.ReadByte() } if e0 != nil { break } if ch0 == '>' { msg, e = pasta.ControlMessageProcess(stream) if e != nil { return fmt.Errorf(fmt.Sprintf("invalid control message %v (%v)", msg, e)) } if (msg.Type == pasta.REF) || (msg.Type == pasta.NOC) { curStreamState = pasta.MSG_REF_NOC } else if msg.Type == pasta.CHROM { curStreamState = pasta.MSG_CHROM } else if msg.Type == pasta.POS { curStreamState = pasta.MSG_POS } else { //just ignore continue //return fmt.Errorf("invalid message type") } message_processed_flag = true } if !message_processed_flag { ch1, e1 = stream.ReadByte() for (e1 == nil) && ((ch1 == '\n') || (ch1 == ' ') || (ch1 == '\r') || (ch1 == '\t')) { ch1, e1 = stream.ReadByte() } if e1 != nil { break } stream0_pos++ stream1_pos++ // special case: nop // if ch0 == '.' && ch1 == '.' { continue } dbp0 = pasta.RefDelBP[ch0] dbp1 = pasta.RefDelBP[ch1] if ch0 == 'a' || ch0 == 'c' || ch0 == 'g' || ch0 == 't' { is_ref0 = true } else if ch0 == 'n' || ch0 == 'N' || ch0 == 'A' || ch0 == 'C' || ch0 == 'G' || ch0 == 'T' { is_noc0 = true } if ch1 == 'a' || ch1 == 'c' || ch1 == 'g' || ch1 == 't' { is_ref1 = true } else if ch1 == 'n' || ch1 == 'N' || ch1 == 'A' || ch1 == 'C' || ch1 == 'G' || ch1 == 'T' { is_noc1 = true } if is_ref0 && is_ref1 { curStreamState = pasta.REF } else if is_noc0 || is_noc1 { curStreamState = pasta.NOC } else { curStreamState = pasta.ALT } } if curStreamState == pasta.BEG { if !is_ref0 || !is_ref1 { if bp, ok := pasta.RefMap[ch0]; ok { refseq = append(refseq, bp) bp_anchor_ref = bp } else if bp, ok := pasta.RefMap[ch1]; ok { refseq = append(refseq, bp) bp_anchor_ref = bp } } else if gFullRefSeqFlag { if bp, ok := pasta.RefMap[ch0]; ok { refseq = append(refseq, bp) bp_anchor_ref = bp } else if bp, ok := pasta.RefMap[ch1]; ok { refseq = append(refseq, bp) bp_anchor_ref = bp } } ref0_len += dbp0 ref1_len += dbp1 if bp_val, ok := pasta.AltMap[ch0]; ok { alt0 = append(alt0, bp_val) } if bp_val, ok := pasta.AltMap[ch1]; ok { alt1 = append(alt1, bp_val) } prvStreamState = curStreamState prev_msg = msg continue } if !message_processed_flag { if is_ref0 && is_ref1 && ch0 != ch1 { return fmt.Errorf(fmt.Sprintf("ERROR: stream position (%d,%d), stream0 token %c (%d), stream1 token %c (%d)", stream0_pos, stream1_pos, ch0, ch0, ch1, ch1)) } } if (prvStreamState == pasta.REF) && (curStreamState != pasta.REF) { info.RefBP = bp_anchor_ref e := p.Print(prvStreamState, ref_start, ref0_len, refseq, nil, out) if e != nil { return e } // Save the last ref BP in case the ALT is an indel. // bp_anchor_prv = '-' if len(refseq) > 0 { bp_anchor_prv = refseq[len(refseq)-1] } ref_start += ref0_len ref0_len = 0 ref1_len = 0 alt0 = alt0[0:0] alt1 = alt1[0:0] refseq = refseq[0:0] } else if (prvStreamState == pasta.NOC) && (curStreamState != pasta.NOC) { full_noc_flag := gFullNocSeqFlag for ii := 0; ii < len(alt0); ii++ { if alt0[ii] != 'n' { full_noc_flag = true break } } if full_noc_flag { for ii := 0; ii < len(alt1); ii++ { if alt1[ii] != 'n' { full_noc_flag = true break } } } a0 := string(alt0) if len(a0) == 0 { a0 = "-" } a1 := string(alt1) if len(a1) == 0 { a1 = "-" } r := string(refseq) if len(r) == 0 { r = "-" } info.RefBP = bp_anchor_ref info.NocSeqFlag = full_noc_flag e := p.Print(prvStreamState, ref_start, ref0_len, []byte(r), [][]byte{[]byte(a0), []byte(a1)}, out) if e != nil { return e } // Save the last ref BP in case the ALT is an indel. // bp_anchor_prv = '-' if len(refseq) > 0 { bp_anchor_prv = refseq[len(refseq)-1] } ref_start += ref0_len ref0_len = 0 ref1_len = 0 alt0 = alt0[0:0] alt1 = alt1[0:0] refseq = refseq[0:0] } else if (prvStreamState == pasta.ALT) && ((curStreamState == pasta.REF) || (curStreamState == pasta.NOC)) { a0 := string(alt0) if len(a0) == 0 { a0 = "-" } a1 := string(alt1) if len(a1) == 0 { a1 = "-" } r := string(refseq) if len(r) == 0 { r = "-" } info.RefBP = bp_anchor_prv e := p.Print(prvStreamState, ref_start, ref0_len, []byte(r), [][]byte{[]byte(a0), []byte(a1)}, out) if e != nil { return e } ref_start += ref0_len ref0_len = 0 ref1_len = 0 alt0 = alt0[0:0] alt1 = alt1[0:0] refseq = refseq[0:0] } else if prvStreamState == pasta.MSG_REF_NOC { info.Msg = prev_msg info.RefBP = bp_anchor_ref e := p.Print(prvStreamState, ref_start, prev_msg.N, refseq, nil, out) if e != nil { return e } ref_start += prev_msg.N stream0_pos += prev_msg.N stream1_pos += prev_msg.N ref0_len = 0 ref1_len = 0 alt0 = alt0[0:0] alt1 = alt1[0:0] refseq = refseq[0:0] } else if prvStreamState == pasta.MSG_CHROM { info.Chrom = prev_msg.Chrom p.Chrom(prev_msg.Chrom) } else if prvStreamState == pasta.MSG_POS { ref_start = prev_msg.RefPos } else { // The current state matches the previous state. // Either both the current tokens are non-ref as well as the previous tokens // or both the current token and previous tokens are ref. } if !message_processed_flag { if bp_val, ok := pasta.AltMap[ch0]; ok { alt0 = append(alt0, bp_val) } if bp_val, ok := pasta.AltMap[ch1]; ok { alt1 = append(alt1, bp_val) } if !is_ref0 || !is_ref1 { if bp, ok := pasta.RefMap[ch0]; ok { refseq = append(refseq, bp) if ref0_len == 0 { bp_anchor_ref = bp } } else if bp, ok := pasta.RefMap[ch1]; ok { refseq = append(refseq, bp) if ref0_len == 0 { bp_anchor_ref = bp } } } else if gFullRefSeqFlag { if bp, ok := pasta.RefMap[ch0]; ok { refseq = append(refseq, bp) if ref0_len == 0 { bp_anchor_ref = bp } } else if bp, ok := pasta.RefMap[ch1]; ok { refseq = append(refseq, bp) if ref0_len == 0 { bp_anchor_ref = bp } } } else if ref0_len == 0 { if bp, ok := pasta.RefMap[ch0]; ok { if ref0_len == 0 { bp_anchor_ref = bp } } else if bp, ok := pasta.RefMap[ch1]; ok { if ref0_len == 0 { bp_anchor_ref = bp } } } ref0_len += dbp0 ref1_len += dbp1 } prvStreamState = curStreamState prev_msg = msg } if prvStreamState == pasta.REF { info.RefBP = bp_anchor_ref e := p.Print(prvStreamState, ref_start, ref0_len, refseq, [][]byte{alt0, alt1}, out) if e != nil { return e } } else if prvStreamState == pasta.NOC { full_noc_flag := gFullNocSeqFlag for ii := 0; ii < len(alt0); ii++ { if alt0[ii] != 'n' { full_noc_flag = true break } } if full_noc_flag { for ii := 0; ii < len(alt1); ii++ { if alt1[ii] != 'n' { full_noc_flag = true break } } } info.NocSeqFlag = full_noc_flag info.RefBP = bp_anchor_ref e := p.Print(prvStreamState, ref_start, ref0_len, refseq, [][]byte{alt0, alt1}, out) if e != nil { return e } } else if prvStreamState == pasta.ALT { a0 := string(alt0) if len(a0) == 0 { a0 = "-" } a1 := string(alt1) if len(a1) == 0 { a1 = "-" } r := string(refseq) if len(r) == 0 { r = "-" } e := p.Print(prvStreamState, ref_start, ref0_len, []byte(r), [][]byte{[]byte(a0), []byte(a1)}, out) if e != nil { return e } } else if prvStreamState == pasta.MSG_REF_NOC { info.Msg = prev_msg info.RefBP = bp_anchor_ref e := p.Print(prvStreamState, ref_start, prev_msg.N, nil, nil, out) if e != nil { return e } } else if prvStreamState == pasta.MSG_CHROM { info.Chrom = prev_msg.Chrom p.Chrom(prev_msg.Chrom) } p.PrintEnd(out) out.Flush() return nil }
// Take in a FastJ stream and a reference stream to produce a PASTA stream. // Assumes each variant 'class' is ordered. // func (g *FastJInfo) Pasta(fastj_stream *bufio.Reader, ref_stream *bufio.Reader, assembly_stream *bufio.Reader, out *bufio.Writer) error { var err error g.LFMod = 50 for ii := 0; ii < 256; ii++ { memz.Score['n'][ii] = 0 memz.Score[ii]['n'] = 0 } ref_pos := g.RefPos ref_seq := make([]byte, 0, 1024) alt_seq := make([][]byte, 2) alt_seq[0] = make([]byte, 0, 1024) alt_seq[1] = make([]byte, 0, 1024) tile_len := make([]int, 2) is_eof := false cur_path := make([]int, 2) _ = cur_path cur_step := make([]int, 2) _ = cur_step cur_var := 0 // For spanning tiles we need to skip the // tag at the beginning. This holds the // number of bases we need to skip. // skip_prefix := make([]int, 2) skip_prefix[0] = 0 skip_prefix[1] = 0 knot_len := make([]int, 2) knot_len[0] = 0 knot_len[1] = 0 for { line, e := fastj_stream.ReadBytes('\n') if e != nil { err = e if e == io.EOF { is_eof = true } break } if len(line) == 0 { continue } if line[0] == '\n' { continue } // Beginning of a header line means we can emit the previous tile information. // if line[0] == '>' { if tile_len[0] == tile_len[1] { if len(ref_seq) > 24 { n := len(ref_seq) - 24 n0 := len(alt_seq[0]) - 24 n1 := len(alt_seq[1]) - 24 if n >= 24 { g.EmitAlignedInterleave(ref_seq[:n], alt_seq[0][:n0], alt_seq[1][:n1], out) } else { return fmt.Errorf("sanity error, no tag") } } tile_len[0] = 0 tile_len[1] = 0 skip_prefix[0] = 0 skip_prefix[1] = 0 knot_len[0] = 0 knot_len[1] = 0 for aa := 0; aa < 2; aa++ { n := len(alt_seq[aa]) if n > 24 { alt_seq[aa] = alt_seq[aa][0:0] } else { alt_seq[aa] = alt_seq[aa][0:0] } } n := len(ref_seq) if n > 24 { ref_seq = ref_seq[n-24:] } else { ref_seq = ref_seq[0:0] } } sj, e := sloppyjson.Loads(string(line[1:])) if e != nil { return fmt.Errorf(fmt.Sprintf("error parsing JSON header: %v", e)) } p, _, s, v, e := parse_tile(sj.O["tileID"].S) if e != nil { return fmt.Errorf(fmt.Sprintf("error parsing tileID: %v", e)) } _ = p _ = s stl := int(sj.O["seedTileLength"].P) tile_len[v] += stl skip_prefix[v] = 0 if knot_len[v] > 0 { skip_prefix[v] = 24 } knot_len[v]++ cur_var = v // Read up to current assembly position in reference and // assembly streams. // if cur_var == 0 { for ii := 0; ii < stl; ii++ { // Advance the next refere position end, reading as many // spanning tiles as we need to (reading 'stl' (seedTileLength) // as many entries from the assembly stream). // e = g.ReadAssembly(assembly_stream) if e != nil { return fmt.Errorf(fmt.Sprintf("ERROR reading assembly at ref_pos %d: %v", ref_pos, e)) } for { if ref_pos >= g.AssemblyEndPos { break } ref_ch, e := ref_stream.ReadByte() if e != nil { return fmt.Errorf(fmt.Sprintf("error reading reference stream (ref_pos %d, AssemblyEndPos %d): %v", ref_pos, g.AssemblyEndPos, e)) } if ref_ch == '\n' || ref_ch == ' ' || ref_ch == '\t' || ref_ch == '\r' { continue } if ref_ch == '>' { msg, e := pasta.ControlMessageProcess(ref_stream) if e != nil { return fmt.Errorf(fmt.Sprintf("error processing control message: %v", e)) } if msg.Type == pasta.POS { ref_pos = msg.RefPos } continue } ref_seq = append(ref_seq, ref_ch) ref_pos++ } if ref_pos != g.AssemblyEndPos { return fmt.Errorf("reference position mismatch") } } } continue } line = bytes.Trim(line, " \t\n") if tile_len[cur_var] == 0 { alt_seq[cur_var] = append(alt_seq[cur_var], line...) } else { // Skip the appropriate bases if this is // part of a knot. // min_pfx := skip_prefix[cur_var] if min_pfx > len(line) { min_pfx = len(line) } alt_seq[cur_var] = append(alt_seq[cur_var], line[min_pfx:]...) // Update bases to skip // skip_prefix[cur_var] -= min_pfx } } if !is_eof { return fmt.Errorf(fmt.Sprintf("non EOF state after stream processed: %v", err)) } // Take care of final tiles // if tile_len[0] == tile_len[1] { if len(ref_seq) >= 24 { g.EmitAlignedInterleave(ref_seq, alt_seq[0], alt_seq[1], out) } else { return fmt.Errorf("sanity, no tag") } } else { return fmt.Errorf("tile position mismatch") } out.WriteByte('\n') out.Flush() return nil }
func (g *FastJInfo) Convert(pasta_stream *bufio.Reader, tag_stream *bufio.Reader, assembly_stream *bufio.Reader, out *bufio.Writer) error { var msg pasta.ControlMessage var e error var pasta_stream0_pos, pasta_stream1_pos int var dbp0, dbp1 int _, _ = dbp0, dbp1 var curStreamState int _ = curStreamState ref_seq := make([]byte, 0, 1024) alt_seq := make([][]byte, 2) alt_seq[0] = make([]byte, 0, 1024) alt_seq[1] = make([]byte, 0, 1024) seed_tile_length := make([]int, 2) seed_tile_length[0] = 1 seed_tile_length[1] = 1 step_pos := make([]int, 2) step_pos[0] = 0 step_pos[1] = 0 lfmod := 50 _ = lfmod ref_pos := g.RefPos e = g.ReadAssembly(assembly_stream) if e != nil { return e } message_processed_flag := false _ = message_processed_flag for { var ch1 byte var e1 error ch0, e0 := pasta_stream.ReadByte() for (e0 == nil) && ((ch0 == '\n') || (ch0 == ' ') || (ch0 == '\r') || (ch0 == '\t')) { ch0, e0 = pasta_stream.ReadByte() } if e0 != nil { break } if ch0 == '>' { msg, e = pasta.ControlMessageProcess(pasta_stream) if e != nil { return fmt.Errorf("invalid control message") } if (msg.Type == pasta.REF) || (msg.Type == pasta.NOC) { curStreamState = pasta.MSG } else { //ignore // continue } message_processed_flag = true continue } for ref_pos > g.AssemblyEndPos { e = g.ReadAssembly(assembly_stream) if e != nil { return e } } // emit tiles // if ref_pos == g.AssemblyEndPos { end_tile_flag := false if !g.TagFinished { e = g.ReadTag(tag_stream) if e != nil { return fmt.Errorf(fmt.Sprintf("ERROR reading tag: %v", e)) } } else { end_tile_flag = true } s_epos := 24 if s_epos > len(alt_seq[0]) { s_epos = len(alt_seq[0]) } e_spos := len(alt_seq[0]) - 24 if e_spos < 0 { e_spos = 0 } if end_tile_flag || g.EndTagMatch(alt_seq[0]) { start_tile_flag := false beg_tag := "" idx_end := len(g.EndTagBuffer) - 1 if end_tile_flag { idx := idx_end - seed_tile_length[0] + 1 //if idx_end>=0 { if idx >= 0 { //beg_tag = g.EndTagBuffer[idx_end] beg_tag = g.EndTagBuffer[idx] } } else if (idx_end - seed_tile_length[0]) >= 0 { beg_tag = g.EndTagBuffer[idx_end-seed_tile_length[0]] } else { start_tile_flag = true } end_tag := "" if !end_tile_flag { end_tag = g.EndTagBuffer[idx_end] } d_beg := -24 if start_tile_flag { d_beg = 0 } out.WriteString(fmt.Sprintf(`>{"tileID":"%04x.%02x.%04x.%03x"`, g.TagPath, g.LibraryVersion, step_pos[0], 0)) out.WriteString(fmt.Sprintf(`,"md5sum":"%s"`, _m5sum_str(alt_seq[0]))) out.WriteString(fmt.Sprintf(`,"tagmask_md5sum":"%s"`, _m5sum_tagmask_str(alt_seq[0], beg_tag, end_tag))) out.WriteString(fmt.Sprintf(`,"locus":[{"build":"%s %s %d %d"}]`, g.RefBuild, g.Chrom, g.AssemblyPrevEndPos+d_beg, g.AssemblyEndPos)) out.WriteString(fmt.Sprintf(`,"n":%d`, len(alt_seq[0]))) out.WriteString(fmt.Sprintf(`,"seedTileLength":%d`, seed_tile_length[0])) out.WriteString(fmt.Sprintf(`,"startTile":%s`, _tf_val(start_tile_flag))) out.WriteString(fmt.Sprintf(`,"endTile":%s`, _tf_val(end_tile_flag))) out.WriteString(fmt.Sprintf(`,"startSeq":"%s","endSeq":"%s"`, alt_seq[0][0:s_epos], alt_seq[0][e_spos:])) out.WriteString(fmt.Sprintf(`,"startTag":"%s"`, beg_tag)) out.WriteString(fmt.Sprintf(`,"endTag":"%s"`, end_tag)) out.WriteString(fmt.Sprintf(`,"nocallCount":%d`, _noc_count(alt_seq[0]))) out.WriteString(fmt.Sprintf(`,"notes":[]`)) out.WriteString(fmt.Sprintf("}\n")) g.WriteFastJSeq(alt_seq[0], out) out.WriteByte('\n') // Update sequence // if len(alt_seq[0]) >= 24 { n := len(alt_seq[0]) alt_seq[0] = alt_seq[0][n-24:] } step_pos[0] += seed_tile_length[0] seed_tile_length[0] = 1 } else { seed_tile_length[0]++ } //---- s_epos = 24 if s_epos > len(alt_seq[1]) { s_epos = len(alt_seq[1]) } e_spos = len(alt_seq[1]) - 24 if e_spos < 0 { e_spos = 1 } if end_tile_flag || g.EndTagMatch(alt_seq[1]) { start_tile_flag := false beg_tag := "" idx_end := len(g.EndTagBuffer) - 1 if end_tile_flag { idx := idx_end - seed_tile_length[1] + 1 //if idx_end>=0 { if idx >= 0 { //beg_tag = g.EndTagBuffer[idx_end] beg_tag = g.EndTagBuffer[idx] } } else if (idx_end - seed_tile_length[1]) >= 0 { beg_tag = g.EndTagBuffer[idx_end-seed_tile_length[1]] } else { start_tile_flag = true } end_tag := "" if !end_tile_flag { end_tag = g.EndTagBuffer[idx_end] } d_beg := -24 if start_tile_flag { d_beg = 0 } out.WriteString(fmt.Sprintf(`>{"tileID":"%04x.%02x.%04x.%03x"`, g.TagPath, g.LibraryVersion, step_pos[1], 1)) out.WriteString(fmt.Sprintf(`,"md5sum":"%s"`, _m5sum_str(alt_seq[1]))) out.WriteString(fmt.Sprintf(`,"tagmask_md5sum":"%s"`, _m5sum_tagmask_str(alt_seq[1], beg_tag, end_tag))) out.WriteString(fmt.Sprintf(`,"locus":[{"build":"%s %s %d %d"}]`, g.RefBuild, g.Chrom, g.AssemblyPrevEndPos+d_beg, g.AssemblyEndPos)) out.WriteString(fmt.Sprintf(`,"n":%d`, len(alt_seq[1]))) out.WriteString(fmt.Sprintf(`,"seedTileLength":%d`, seed_tile_length[1])) out.WriteString(fmt.Sprintf(`,"startTile":%s`, _tf_val(start_tile_flag))) out.WriteString(fmt.Sprintf(`,"endTile":%s`, _tf_val(end_tile_flag))) out.WriteString(fmt.Sprintf(`,"startSeq":"%s","endSeq":"%s"`, alt_seq[1][0:s_epos], alt_seq[1][e_spos:])) out.WriteString(fmt.Sprintf(`,"startTag":"%s"`, beg_tag)) out.WriteString(fmt.Sprintf(`,"endTag":"%s"`, end_tag)) out.WriteString(fmt.Sprintf(`,"nocallCount":%d`, _noc_count(alt_seq[1]))) out.WriteString(fmt.Sprintf(`,"notes":[ ]`)) out.WriteString(fmt.Sprintf("}\n")) g.WriteFastJSeq(alt_seq[1], out) out.WriteByte('\n') // Update sequence // if len(alt_seq[1]) >= 24 { n := len(alt_seq[1]) alt_seq[1] = alt_seq[1][n-24:] } step_pos[1] += seed_tile_length[1] seed_tile_length[1] = 1 } else { seed_tile_length[1]++ } if len(ref_seq) >= 24 { n := len(ref_seq) ref_seq = ref_seq[n-24:] } e = g.ReadAssembly(assembly_stream) if e != nil { return fmt.Errorf(fmt.Sprintf("ERROR reading assembly: %v", e)) } } message_processed_flag = false ch1, e1 = pasta_stream.ReadByte() for (e1 == nil) && ((ch1 == '\n') || (ch1 == ' ') || (ch1 == '\r') || (ch1 == '\t')) { ch1, e1 = pasta_stream.ReadByte() } if e1 != nil { break } pasta_stream0_pos++ pasta_stream1_pos++ // special case: nop // if ch0 == '.' && ch1 == '.' { continue } dbp0 = pasta.RefDelBP[ch0] dbp1 = pasta.RefDelBP[ch1] anch_bp := ch0 if anch_bp == '.' { anch_bp = ch1 } is_del := []bool{false, false} is_ins := []bool{false, false} is_ref := []bool{false, false} _ = is_ref is_noc := []bool{false, false} _ = is_noc if ch0 == '!' || ch0 == '$' || ch0 == '7' || ch0 == 'E' || ch0 == 'z' { is_del[0] = true } else if ch0 == 'Q' || ch0 == 'S' || ch0 == 'W' || ch0 == 'd' || ch0 == 'Z' { is_ins[0] = true } else if ch0 == 'a' || ch0 == 'c' || ch0 == 'g' || ch0 == 't' { is_ref[0] = true } else if ch0 == 'n' || ch0 == 'N' || ch0 == 'A' || ch0 == 'C' || ch0 == 'G' || ch0 == 'T' { is_noc[0] = true } if ch1 == '!' || ch1 == '$' || ch1 == '7' || ch1 == 'E' || ch1 == 'z' { is_del[1] = true } else if ch1 == 'Q' || ch1 == 'S' || ch1 == 'W' || ch1 == 'd' || ch1 == 'Z' { is_ins[1] = true } else if ch1 == 'a' || ch1 == 'c' || ch1 == 'g' || ch1 == 't' { is_ref[1] = true } else if ch1 == 'n' || ch1 == 'N' || ch1 == 'A' || ch1 == 'C' || ch1 == 'G' || ch1 == 'T' { is_noc[1] = true } if (is_ins[0] && (!is_ins[1] && ch1 != '.')) || (is_ins[1] && (!is_ins[0] && ch0 != '.')) { return fmt.Errorf(fmt.Sprintf("insertion mismatch (ch %c,%c ord(%v,%v) @ %v)", ch0, ch1, ch0, ch1, ref_pos)) } // Add to reference sequence // for { if is_ins[0] || is_ins[1] { break } if ch1 == '.' { ref_seq = append(ref_seq, pasta.RefMap[ch0]) } else if ch0 == '.' { ref_seq = append(ref_seq, pasta.RefMap[ch1]) } else { ref_bp := pasta.RefMap[ch0] if ref_bp != pasta.RefMap[ch1] { return fmt.Errorf(fmt.Sprintf("PASTA reference bases do not match (%c != %c) at %d %d (refpos %d)\n", ref_bp, pasta.RefMap[ch1], pasta_stream0_pos, pasta_stream1_pos, ref_pos)) } ref_seq = append(ref_seq, ref_bp) } ref_pos++ break } // Alt sequences // for { if ch0 == '.' { break } if pasta.IsAltDel[ch0] { break } alt_seq[0] = append(alt_seq[0], pasta.AltMap[ch0]) break } for { if ch1 == '.' { break } if pasta.IsAltDel[ch1] { break } alt_seq[1] = append(alt_seq[1], pasta.AltMap[ch1]) break } } for ref_pos < g.AssemblyEndPos { for aa := 0; aa < 2; aa++ { alt_seq[aa] = append(alt_seq[aa], 'n') } ref_pos++ } // emit tiles // if ref_pos == g.AssemblyEndPos { // Emit final FastJ sequences // for aa := 0; aa < 2; aa++ { start_tile_flag := false beg_tag := "" idx_end := len(g.EndTagBuffer) - 1 if idx_end >= 0 { //beg_tag = g.EndTagBuffer[idx_end] idx := idx_end - seed_tile_length[aa] + 1 if idx >= 0 { beg_tag = g.EndTagBuffer[idx] } } else { start_tile_flag = true } // We're at the end of the path, so no end tag // end_tag := "" s_epos := 24 if s_epos > len(alt_seq[aa]) { s_epos = len(alt_seq[aa]) } e_spos := len(alt_seq[aa]) - 24 if e_spos < 0 { e_spos = 1 } out.WriteString(fmt.Sprintf(`>{"tileID":"%04x.%02x.%04x.%03x"`, g.TagPath, g.LibraryVersion, step_pos[aa], aa)) out.WriteString(fmt.Sprintf(`,"md5sum":"%s"`, _m5sum_str(alt_seq[aa]))) out.WriteString(fmt.Sprintf(`,"tagmask_md5sum":"%s"`, _m5sum_tagmask_str(alt_seq[aa], beg_tag, end_tag))) out.WriteString(fmt.Sprintf(`,"locus":[{"build":"%s %s %d %d"}]`, g.RefBuild, g.Chrom, g.AssemblyPrevEndPos, g.AssemblyEndPos)) out.WriteString(fmt.Sprintf(`,"n":%d`, len(alt_seq[aa]))) out.WriteString(fmt.Sprintf(`,"seedTileLength":%d`, seed_tile_length[aa])) out.WriteString(fmt.Sprintf(`,"startTile":%s`, _tf_val(start_tile_flag))) out.WriteString(fmt.Sprintf(`,"endTile":%s`, _tf_val(true))) out.WriteString(fmt.Sprintf(`,"startSeq":"%s","endSeq":"%s"`, alt_seq[aa][0:s_epos], alt_seq[aa][e_spos:])) out.WriteString(fmt.Sprintf(`,"startTag":"%s"`, beg_tag)) out.WriteString(fmt.Sprintf(`,"endTag":"%s"`, end_tag)) out.WriteString(fmt.Sprintf(`,"nocallCount":%d`, _noc_count(alt_seq[aa]))) out.WriteString(fmt.Sprintf(`,"notes":[ ]`)) out.WriteString(fmt.Sprintf("}\n")) g.WriteFastJSeq(alt_seq[aa], out) out.WriteByte('\n') } } out.WriteByte('\n') out.Flush() return nil }
func interleave_to_haploid(stream *bufio.Reader, ind int) error { var msg pasta.ControlMessage var e error var stream0_pos, stream1_pos int var dbp0, dbp1 int _, _ = dbp0, dbp1 var curStreamState int _ = curStreamState out := bufio.NewWriter(os.Stdout) bp_count := 0 lfmod := 50 for { message_processed_flag := false var ch1 byte var e1 error ch0, e0 := stream.ReadByte() for (e0 == nil) && ((ch0 == '\n') || (ch0 == ' ') || (ch0 == '\r') || (ch0 == '\t')) { ch0, e0 = stream.ReadByte() } if e0 != nil { break } if ch0 == '>' { msg, e = pasta.ControlMessageProcess(stream) if e != nil { return fmt.Errorf("invalid control message") } if (msg.Type == pasta.REF) || (msg.Type == pasta.NOC) { curStreamState = pasta.MSG } else { //ignore continue //return fmt.Errorf("invalid message type") } message_processed_flag = true continue } if !message_processed_flag { ch1, e1 = stream.ReadByte() for (e1 == nil) && ((ch1 == '\n') || (ch1 == ' ') || (ch1 == '\r') || (ch1 == '\t')) { ch1, e1 = stream.ReadByte() } if e1 != nil { break } stream0_pos++ stream1_pos++ // special case: nop // if ch0 == '.' && ch1 == '.' { continue } dbp0 = pasta.RefDelBP[ch0] dbp1 = pasta.RefDelBP[ch1] anch_bp := ch0 if anch_bp == '.' { anch_bp = ch1 } is_del := []bool{false, false} is_ins := []bool{false, false} is_ref := []bool{false, false} _ = is_ref is_noc := []bool{false, false} _ = is_noc if ch0 == '!' || ch0 == '$' || ch0 == '7' || ch0 == 'E' || ch0 == 'z' { is_del[0] = true } else if ch0 == 'Q' || ch0 == 'S' || ch0 == 'W' || ch0 == 'd' || ch0 == 'Z' { is_ins[0] = true } else if ch0 == 'a' || ch0 == 'c' || ch0 == 'g' || ch0 == 't' { is_ref[0] = true } else if ch0 == 'n' || ch0 == 'N' || ch0 == 'A' || ch0 == 'C' || ch0 == 'G' || ch0 == 'T' { is_noc[0] = true } if ch1 == '!' || ch1 == '$' || ch1 == '7' || ch1 == 'E' || ch1 == 'z' { is_del[1] = true } else if ch1 == 'Q' || ch1 == 'S' || ch1 == 'W' || ch1 == 'd' || ch1 == 'Z' { is_ins[1] = true } else if ch1 == 'a' || ch1 == 'c' || ch1 == 'g' || ch1 == 't' { is_ref[1] = true } else if ch1 == 'n' || ch1 == 'N' || ch1 == 'A' || ch1 == 'C' || ch1 == 'G' || ch1 == 'T' { is_noc[1] = true } if (is_ins[0] && (!is_ins[1] && ch1 != '.')) || (is_ins[1] && (!is_ins[0] && ch0 != '.')) { out.Flush() return fmt.Errorf(fmt.Sprintf("interleave_to_haploid: insertion mismatch (ch %c,%c ord(%v,%v) @ %v)", ch0, ch1, ch0, ch1, bp_count)) } if ind == -1 { // ref if is_ins[0] || is_ins[1] { continue } if ch0 != '.' { och, ok := pasta.RefMap[ch0] if !ok { return fmt.Errorf("interleave_to_haploid: no character found in stream0 RefMap for %c ord(%d) @ %d", ch0, ch0, bp_count) } out.WriteByte(och) } else { och, ok := pasta.RefMap[ch1] if !ok { return fmt.Errorf("interleave_to_haploid: no character found in stream1 RefMap for %c ord(%d) @ %d", ch1, ch1, bp_count) } out.WriteByte(och) } bp_count++ if (lfmod > 0) && ((bp_count % lfmod) == 0) { out.WriteByte('\n') } } else if ind == 0 { // alt0 if ch0 == '.' { continue } if pasta.IsAltDel[ch0] { continue } och, ok := pasta.AltMap[ch0] if !ok { return fmt.Errorf("interleave_to_haploid: no character found in stream0 AltMap for %c ord(%d) @ %d", ch0, ch0, bp_count) } out.WriteByte(och) bp_count++ if (lfmod > 0) && ((bp_count % lfmod) == 0) { out.WriteByte('\n') } } else if ind == 1 { // alt1 if ch1 == '.' { continue } if pasta.IsAltDel[ch1] { continue } och, ok := pasta.AltMap[ch1] if !ok { return fmt.Errorf("interleave_to_haploid: no character found in stream0 AltMap for %c ord(%d) @ %d", ch1, ch1, bp_count) } out.WriteByte(och) bp_count++ if (lfmod > 0) && ((bp_count % lfmod) == 0) { out.WriteByte('\n') } } } } out.WriteByte('\n') out.Flush() return nil }
func pasta_to_haploid(stream *bufio.Reader, ind int) error { var msg pasta.ControlMessage var e error var stream0_pos int var dbp0 int _ = dbp0 var curStreamState int _ = curStreamState out := bufio.NewWriter(os.Stdout) bp_count := 0 lfmod := 50 for { message_processed_flag := false ch0, e0 := stream.ReadByte() for (e0 == nil) && ((ch0 == '\n') || (ch0 == ' ') || (ch0 == '\r') || (ch0 == '\t')) { ch0, e0 = stream.ReadByte() } if e0 != nil { break } if ch0 == '>' { msg, e = pasta.ControlMessageProcess(stream) if e != nil { return fmt.Errorf("invalid control message") } if (msg.Type == pasta.REF) || (msg.Type == pasta.NOC) { curStreamState = pasta.MSG } else { //ignore continue } message_processed_flag = true continue } if !message_processed_flag { stream0_pos++ // special case: nop // if ch0 == '.' { continue } is_del := false _ = is_del is_ins := false _ = is_ins is_ref := false _ = is_ref is_noc := false _ = is_noc if ch0 == '!' || ch0 == '$' || ch0 == '7' || ch0 == 'E' || ch0 == 'z' { is_del = true } else if ch0 == 'Q' || ch0 == 'S' || ch0 == 'W' || ch0 == 'd' || ch0 == 'Z' { is_ins = true } else if ch0 == 'a' || ch0 == 'c' || ch0 == 'g' || ch0 == 't' { is_ref = true } else if ch0 == 'n' || ch0 == 'N' || ch0 == 'A' || ch0 == 'C' || ch0 == 'G' || ch0 == 'T' { is_noc = true } dbp0 = pasta.RefDelBP[ch0] if ind == -1 { // ref if is_ins { continue } if ch0 != '.' { out.WriteByte(pasta.RefMap[ch0]) } bp_count++ if (lfmod > 0) && ((bp_count % lfmod) == 0) { out.WriteByte('\n') } } else if ind == 0 { // alt0 if ch0 == '.' { continue } if pasta.IsAltDel[ch0] { continue } out.WriteByte(pasta.AltMap[ch0]) bp_count++ if (lfmod > 0) && ((bp_count % lfmod) == 0) { out.WriteByte('\n') } } } } out.WriteByte('\n') out.Flush() return nil }