func _main_fasta_to_pasta(c *cli.Context) { var e error infn_slice := c.StringSlice("input") if len(infn_slice) < 1 { infn_slice = append(infn_slice, "-") } ain, err := autoio.OpenReadScanner(infn_slice[0]) if err != nil { fmt.Fprintf(os.Stderr, "%v", err) os.Stderr.Sync() os.Exit(1) } defer ain.Close() fp := os.Stdin if c.String("refstream") != "-" { fp, e = os.Open(c.String("refstream")) if e != nil { fmt.Fprintf(os.Stderr, "%v", e) os.Stderr.Sync() os.Exit(1) } defer fp.Close() } ref_stream := bufio.NewReader(fp) out := bufio.NewWriter(os.Stdout) fi := FASTAInfo{} fi.Init() fi.Allele = 0 line_no := 0 fi.PastaBegin(out) for ain.ReadScan() { fasta_line := ain.ReadText() line_no++ if len(fasta_line) == 0 || fasta_line == "" { continue } e := fi.Pasta(fasta_line, ref_stream, out) if e != nil { fmt.Fprintf(os.Stderr, "ERROR: %v at line %v\n", e, line_no) return } } fi.PastaEnd(out) }
func _main_gvcf_to_rotini(c *cli.Context) { var e error infn_slice := c.StringSlice("input") if len(infn_slice) < 1 { infn_slice = append(infn_slice, "-") } ain, err := autoio.OpenReadScanner(infn_slice[0]) if err != nil { fmt.Fprintf(os.Stderr, "%v", err) os.Stderr.Sync() os.Exit(1) } defer ain.Close() fp := os.Stdin if c.String("refstream") != "-" { fp, e = os.Open(c.String("refstream")) if e != nil { fmt.Fprintf(os.Stderr, "%v", e) os.Stderr.Sync() os.Exit(1) } defer fp.Close() } ref_stream := bufio.NewReader(fp) out := bufio.NewWriter(os.Stdout) g := gvcf.GVCFRefVar{} g.Init() line_no := 0 g.PastaBegin(out) for ain.ReadScan() { gvcf_line := ain.ReadText() line_no++ if len(gvcf_line) == 0 || gvcf_line == "" { continue } e := g.Pasta(gvcf_line, ref_stream, out) if e != nil { fmt.Fprintf(os.Stderr, "ERROR: %v at line %v\n", e, line_no) return } } g.PastaEnd(out) out.Flush() }
func _main_diff_to_rotini(c *cli.Context) { infn_slice := c.StringSlice("input") if len(infn_slice) < 1 { infn_slice = append(infn_slice, "-") } ain, err := autoio.OpenReadScanner(infn_slice[0]) if err != nil { fmt.Fprintf(os.Stderr, "%v", err) os.Stderr.Sync() os.Exit(1) } defer ain.Close() diff_to_interleave(&ain) }
func _main(c *cli.Context) { gShowKnotNocallInfoFlag = !c.Bool("hide-knot-low-quality") inp_slice := c.StringSlice("input") cglf_lib_location := c.String("cglf") action := c.String("action") if action == "debug" { //debug_read(c.String("cgf")) cgf.DebugRead(c.String("cgf")) return } else if action == "headercheck" { //header_bytes := cgf_default_header_bytes() header_bytes := cgf.CGFDefaultHeaderBytes() //hdri,dn := headerintermediate_from_bytes(header_bytes) ; _ = dn hdri, dn := cgf.HeaderIntermediateFromBytes(header_bytes) _ = dn //hdri_bytes := bytes_from_headerintermediate(hdri) hdri_bytes := cgf.BytesFromHeaderIntermediate(hdri) //hdri1,dn2 := headerintermediate_from_bytes(hdri_bytes) ; _ = dn2 hdri1, dn2 := cgf.HeaderIntermediateFromBytes(hdri_bytes) _ = dn2 //err := headerintermediate_cmp(hdri, hdri1) err := cgf.HeaderIntermediateCmp(hdri, hdri1) if err != nil { log.Fatal(err) } return } else if action == "header" { ocgf := c.String("output") //header_bytes := cgf_default_header_bytes() header_bytes := cgf.CGFDefaultHeaderBytes() f, err := os.Create(ocgf) if err != nil { log.Fatal(err) } f.Write(header_bytes) f.Sync() f.Close() return } else if action == "knot" { cglf_path := c.String("cglf") if len(cglf_path) == 0 { fmt.Fprintf(os.Stderr, "Provide CGLF\n") cli.ShowAppHelp(c) os.Exit(1) } cgf_bytes, e := ioutil.ReadFile(c.String("cgf")) if e != nil { log.Fatal(e) } //hdri,dn := headerintermediate_from_bytes(cgf_bytes[:]) hdri, dn := cgf.HeaderIntermediateFromBytes(cgf_bytes[:]) _ = hdri _ = dn //path,ver,step,e := parse_tilepos(c.String("tilepos")) path, ver, step, e := cgf.ParseTilepos(c.String("tilepos")) if e != nil { log.Fatal(e) } if path < 0 { log.Fatal("path must be positive") } if step < 0 { log.Fatal("step must be positive") } //if path >= len(hdri.step_per_path) { log.Fatal("path out of range (max ", len(hdri.step_per_path), " paths)") } //if step>= hdri.step_per_path[path] { log.Fatal("step out of range (max ", hdri.step_per_path[path], " steps)") } if path >= len(hdri.StepPerPath) { log.Fatal("path out of range (max ", len(hdri.StepPerPath), " paths)") } if step >= hdri.StepPerPath[path] { log.Fatal("step out of range (max ", hdri.StepPerPath[path], " steps)") } //pathi,_ := pathintermediate_from_bytes(hdri.path_bytes[path]) pathi, _ := cgf.PathIntermediateFromBytes(hdri.PathBytes[path]) knot := cgf.GetKnot(hdri.TileMap, pathi, step) if knot == nil { fmt.Printf("spanning tile?\n") } else { for i := 0; i < len(knot); i++ { phase_str := "A" if i == 1 { phase_str = "B" } for j := 0; j < len(knot[i]); j++ { fmt.Printf("%s %04x.%02x.%04x.%03x+%x", phase_str, path, ver, knot[i][j].Step, knot[i][j].VarId, knot[i][j].Span) seq := cgf.CGLFGetLibSeq(uint64(path), uint64(knot[i][j].Step), uint64(knot[i][j].VarId), uint64(knot[i][j].Span), cglf_path) if len(knot[i][j].NocallStartLen) > 0 { fmt.Printf("*{") for p := 0; p < len(knot[i][j].NocallStartLen); p += 2 { if p > 0 { fmt.Printf(";") } fmt.Printf("%d+%d", knot[i][j].NocallStartLen[p], knot[i][j].NocallStartLen[p+1]) } fmt.Printf("}") //noc_seq := fill_noc_seq(seq, knot[i][j].NocallStartLen) noc_seq := cgf.FillNocSeq(seq, knot[i][j].NocallStartLen) noc_m5str := cgf.Md5sum2str(md5.Sum([]byte(noc_seq))) fmt.Printf(" %s\n%s\n", noc_m5str, noc_seq) } else { m5str := cgf.Md5sum2str(md5.Sum([]byte(seq))) fmt.Printf(" %s\n%s\n", m5str, seq) } } } } return } else if action == "fastj" { tilepos_str := c.String("tilepos") if len(tilepos_str) == 0 { log.Fatal("missing tilepos") } if use_SGLF { _sglf, e := cglf.LoadGenomeLibraryCSV(c.String("sglf")) if e != nil { log.Fatal(e) } for i := 0; i < len(inp_slice); i++ { //e = print_tile_sglf(inp_slice[i], tilepos_str, sglf) e = cgf.PrintTileSGLF(inp_slice[i], tilepos_str, _sglf) if e != nil { log.Fatal(e) } } } else { if len(c.String("cgf")) != 0 { inp_slice = append(inp_slice, c.String("cgf")) } for i := 0; i < len(inp_slice); i++ { //e := print_tile_cglf(inp_slice[i], tilepos_str, cglf_lib_location) e := cgf.PrintTileCGLF(inp_slice[i], tilepos_str, cglf_lib_location) if e != nil { log.Fatal(e) } } } return } else if action == "fastj-range" { tilepos_str := c.String("tilepos") pos_parts := strings.Split(tilepos_str, ".") if (len(pos_parts) != 2) && (len(pos_parts) != 3) { fmt.Fprintf(os.Stderr, "Invalid tilepos\n") cli.ShowAppHelp(c) os.Exit(1) } path_range, e := parseIntOption(pos_parts[0], 16) if e != nil { fmt.Fprintf(os.Stderr, "Invalid path in tilepos: %v\n", e) cli.ShowAppHelp(c) os.Exit(1) } pp := 1 if len(pos_parts) == 3 { pp = 2 } step_range, e := parseIntOption(pos_parts[pp], 16) if e != nil { fmt.Fprintf(os.Stderr, "Invalid step in tilepos: %v\n", e) cli.ShowAppHelp(c) os.Exit(1) } if len(tilepos_str) == 0 { log.Fatal("missing tilepos") } if len(c.String("sglf")) > 0 { use_SGLF = true } if use_SGLF { _sglf, e := cglf.LoadGenomeLibraryCSV(c.String("sglf")) _ = _sglf if e != nil { log.Fatal(e) } if len(c.String("cgf")) != 0 { inp_slice = append(inp_slice, c.String("cgf")) } for i := 0; i < len(inp_slice); i++ { cgf_bytes, e := ioutil.ReadFile(inp_slice[i]) if e != nil { log.Fatal(e) } path := path_range[0][0] //hdri,_ := headerintermediate_from_bytes(cgf_bytes) ; _ = hdri //pathi,_ := pathintermediate_from_bytes(hdri.PathBytes[path]) ; _ = pathi hdri, _ := cgf.HeaderIntermediateFromBytes(cgf_bytes) _ = hdri pathi, _ := cgf.PathIntermediateFromBytes(hdri.PathBytes[path]) _ = pathi //hdri,dn := headerintermediate_from_bytes(cgf_bytes) hdri, dn := cgf.HeaderIntermediateFromBytes(cgf_bytes) if dn < 0 { log.Fatal("could not construct header from bytes") } //patho,dn := pathintermediate_from_bytes(hdri.PathBytes[path]) patho, dn := cgf.PathIntermediateFromBytes(hdri.PathBytes[path]) if dn < 0 { log.Fatal("could not construct path") } tilemap_bytes, _ := cgf.CGFTilemapBytes(cgf_bytes) //tilemap := unpack_tilemap(tilemap_bytes) tilemap := cgf.UnpackTileMap(tilemap_bytes) for step_idx := 0; step_idx < len(step_range); step_idx++ { if step_range[step_idx][1] == -1 { //step_range[step_idx][1] = int64(hdri.step_per_path[path]) step_range[step_idx][1] = int64(hdri.StepPerPath[path]) } } for stepr_idx := 0; stepr_idx < len(step_range); stepr_idx++ { for step := step_range[stepr_idx][0]; step < step_range[stepr_idx][1]; step++ { //knot := GetKnot(tilemap, patho, int(step)) //print_knot_fastj_sglf(knot, _sglf, uint64(path), 0, hdri) knot := cgf.GetKnot(tilemap, patho, int(step)) cgf.PrintKnotFastjSGLF(knot, _sglf, uint64(path), 0, hdri) } } } return } else { if len(c.String("cgf")) != 0 { inp_slice = append(inp_slice, c.String("cgf")) } for i := 0; i < len(inp_slice); i++ { cgf_bytes, e := ioutil.ReadFile(inp_slice[i]) if e != nil { log.Fatal(e) } path := path_range[0][0] _sglf := cglf.SGLF{} //populate_sglf_from_cglf(c.String("cglf"), &_sglf, uint64(path)) cgf.PopulateSGLFFromCGLF(c.String("cglf"), &_sglf, uint64(path)) os.Exit(0) //hdri,_ := headerintermediate_from_bytes(cgf_bytes) ; _ = hdri //pathi,_ := pathintermediate_from_bytes(hdri.PathBytes[path]) ; _ = pathi hdri, _ := cgf.HeaderIntermediateFromBytes(cgf_bytes) _ = hdri pathi, _ := cgf.PathIntermediateFromBytes(hdri.PathBytes[path]) _ = pathi //hdri,dn := headerintermediate_from_bytes(cgf_bytes) hdri, dn := cgf.HeaderIntermediateFromBytes(cgf_bytes) if dn < 0 { log.Fatal("could not construct header from bytes") } //patho,dn := pathintermediate_from_bytes(hdri.PathBytes[path]) patho, dn := cgf.PathIntermediateFromBytes(hdri.PathBytes[path]) if dn < 0 { log.Fatal("could not construct path") } tilemap_bytes, _ := cgf.CGFTilemapBytes(cgf_bytes) //tilemap := unpack_tilemap(tilemap_bytes) tilemap := cgf.UnpackTileMap(tilemap_bytes) for step_idx := 0; step_idx < len(step_range); step_idx++ { if step_range[step_idx][1] == -1 { //step_range[step_idx][1] = int64(hdri.step_per_path[path]) step_range[step_idx][1] = int64(hdri.StepPerPath[path]) } } for stepr_idx := 0; stepr_idx < len(step_range); stepr_idx++ { for step := step_range[stepr_idx][0]; step < step_range[stepr_idx][1]; step++ { //knot := GetKnot(tilemap, patho, int(step)) //print_knot_fastj_sglf(knot, _sglf, uint64(path), 0, hdri) knot := cgf.GetKnot(tilemap, patho, int(step)) cgf.PrintKnotFastjSGLF(knot, _sglf, uint64(path), 0, hdri) } } } } return } else if action == "knot-z" { cgf_bytes, e := ioutil.ReadFile(c.String("cgf")) _ = cgf_bytes if e != nil { log.Fatal(e) } path, ver, step, e := cgf.ParseTilepos(c.String("tilepos")) _ = path _ = ver _ = step if e != nil { log.Fatal(e) } if path < 0 { log.Fatal("path must be positive") } if step < 0 { log.Fatal("step must be positive") } fmt.Printf("not implemented\n") return } else if action == "knot-2" { cgf_bytes, e := ioutil.ReadFile(c.String("cgf")) if e != nil { log.Fatal(e) } //hdri,dn := headerintermediate_from_bytes(cgf_bytes[:]) hdri, dn := cgf.HeaderIntermediateFromBytes(cgf_bytes[:]) _ = hdri _ = dn //path,ver,step,e := parse_tilepos(c.String("tilepos")) path, ver, step, e := cgf.ParseTilepos(c.String("tilepos")) if e != nil { log.Fatal(e) } if path < 0 { log.Fatal("path must be positive") } if step < 0 { log.Fatal("step must be positive") } //if path >= len(hdri.step_per_path) { log.Fatal("path out of range (max ", len(hdri.step_per_path), " paths)") } //if step>= hdri.step_per_path[path] { log.Fatal("step out of range (max ", hdri.step_per_path[path], " steps)") } if path >= len(hdri.StepPerPath) { log.Fatal("path out of range (max ", len(hdri.StepPerPath), " paths)") } if step >= hdri.StepPerPath[path] { log.Fatal("step out of range (max ", hdri.StepPerPath[path], " steps)") } //pathi,_ := pathintermediate_from_bytes(hdri.PathBytes[path]) pathi, _ := cgf.PathIntermediateFromBytes(hdri.PathBytes[path]) //knot := GetKnot(hdri.tilemap, pathi, step) knot := cgf.GetKnot(hdri.TileMap, pathi, step) if knot == nil { fmt.Printf("spanning tile?") } else { for i := 0; i < len(knot); i++ { for j := 0; j < len(knot[i]); j++ { if j > 0 { fmt.Printf(" ") } fmt.Printf("%04x.%02x.%04x.%03x+%x", path, ver, knot[i][j].Step, knot[i][j].VarId, knot[i][j].Span) if gShowKnotNocallInfoFlag { if len(knot[i][j].NocallStartLen) > 0 { fmt.Printf("*{") for p := 0; p < len(knot[i][j].NocallStartLen); p += 2 { if p > 0 { fmt.Printf(";") } fmt.Printf("%d+%d", knot[i][j].NocallStartLen[p], knot[i][j].NocallStartLen[p+1]) } fmt.Printf("}") } } } fmt.Printf("\n") } } return } else if action == "sglfbarf" { //_sglf,e := LoadGenomeLibraryCSV(c.String("sglf")) _sglf, e := cglf.LoadGenomeLibraryCSV(c.String("sglf")) if e != nil { log.Fatal(e) } for path := range _sglf.LibInfo { for step := range _sglf.LibInfo[path] { for i := 0; i < len(_sglf.LibInfo[path][step]); i++ { fmt.Printf("%x,%x,%x.%x.%x+%x\n", path, step, _sglf.LibInfo[path][step][i].Path, _sglf.LibInfo[path][step][i].Step, _sglf.LibInfo[path][step][i].Variant, _sglf.LibInfo[path][step][i].Span) } } } return } else if action == "append" { _sglf, e := cglf.LoadGenomeLibraryCSV(c.String("sglf")) if e != nil { log.Fatal(e) } ain_slice := make([]autoio.AutoioHandle, 0, 8) for i := 0; i < len(inp_slice); i++ { inp_fn := inp_slice[i] ain, err := autoio.OpenReadScanner(inp_fn) _ = ain if err != nil { fmt.Fprintf(os.Stderr, "%v", err) os.Exit(1) } defer ain.Close() ain_slice = append(ain_slice, ain) break } path_str := c.String("path") path_u64, e := strconv.ParseInt(path_str, 16, 64) if e != nil { log.Fatal(e) } path := int(path_u64) cgf_bytes, e := ioutil.ReadFile(c.String("cgf")) if e != nil { log.Fatal(e) } //hdri,_ := headerintermediate_from_bytes(cgf_bytes[:]) hdri, _ := cgf.HeaderIntermediateFromBytes(cgf_bytes[:]) ctx := cgf.CGFContext{} _cgf := cgf.CGF{} _cgf.PathBytes = make([][]byte, 0, 1024) cgf.CGFFillHeader(&_cgf, cgf_bytes) ctx.CGF = &_cgf ctx.SGLF = &_sglf //CGFContext_construct_tilemap_lookup(&ctx) ctx.ConstructTileMapLookup() //allele_path,e := load_sample_fastj(&ain_slice[0]) allele_path, e := cgf.LoadSampleFastj(&ain_slice[0]) if e != nil { log.Fatal(e) } //PathBytes,e := emit_path_bytes(&ctx, path, allele_path) PathBytes, e := ctx.EmitPathBytes(path, allele_path) if e != nil { log.Fatal(e) } //headerintermediate_add_path(&hdri, path, PathBytes) //write_cgf_from_intermediate(c.String("output"), &hdri) cgf.HeaderIntermediateAddPath(&hdri, path, PathBytes) cgf.WriteCGFFromIntermediate(c.String("output"), &hdri) return } ain_slice := make([]autoio.AutoioHandle, 0, 8) for i := 0; i < len(inp_slice); i++ { inp_fn := inp_slice[i] ain, err := autoio.OpenReadScanner(inp_fn) _ = ain if err != nil { fmt.Fprintf(os.Stderr, "%v", err) os.Exit(1) } defer ain.Close() ain_slice = append(ain_slice, ain) } aout, err := autoio.CreateWriter(c.String("output")) _ = aout if err != nil { fmt.Fprintf(os.Stderr, "%v", err) os.Exit(1) } defer func() { aout.Flush(); aout.Close() }() if c.Bool("pprof") { gProfileFlag = true gProfileFile = c.String("pprof-file") } if c.Bool("mprof") { gMemProfileFlag = true gMemProfileFile = c.String("mprof-file") } gVerboseFlag = c.Bool("Verbose") if c.Int("max-procs") > 0 { runtime.GOMAXPROCS(c.Int("max-procs")) } if gProfileFlag { prof_f, err := os.Create(gProfileFile) if err != nil { fmt.Fprintf(os.Stderr, "Could not open profile file %s: %v\n", gProfileFile, err) os.Exit(2) } pprof.StartCPUProfile(prof_f) defer pprof.StopCPUProfile() } _sglf, e := cglf.LoadGenomeLibraryCSV(c.String("sglf")) if e != nil { log.Fatal(e) } ctx := cgf.CGFContext{} _cgf := cgf.CGF{} //header_bytes := cgf_default_header_bytes() header_bytes := cgf.CGFDefaultHeaderBytes() cgf.CGFFillHeader(&_cgf, header_bytes) ctx.CGF = &_cgf ctx.SGLF = &_sglf //CGFContext_construct_tilemap_lookup(&ctx) ctx.ConstructTileMapLookup() for i := 0; i < len(ain_slice); i++ { ain := ain_slice[i] //allele_path,e := load_sample_fastj(&ain) allele_path, e := cgf.LoadSampleFastj(&ain) if e != nil { log.Fatal(e) } p := 0x2c5 if i > 0 { p = 0x247 } //e = update_vector_path_simple(&ctx, p, allele_path) e = ctx.UpdateVectorPathSimple(p, allele_path) if len(ctx.CGF.StepPerPath) < len(ain_slice) { ctx.CGF.StepPerPath = append(ctx.CGF.StepPerPath, uint64(len(_sglf.Lib[p]))) } } ctx.CGF.PathCount = uint64(len(_cgf.Path)) ctx.CGF.StepPerPath = make([]uint64, ctx.CGF.PathCount) for i := uint64(0); i < ctx.CGF.PathCount; i++ { ctx.CGF.StepPerPath[i] = uint64(len(_sglf.Lib[int(i)])) } //write_cgf(&ctx, "out.cgf") ctx.WriteCGF("out.cgf") }
func load_Assembly(ctx *LanternContext, tagset_pdh, assembly_pdh string) error { assembly_fn := ctx.Config.O["tagset"].O[tagset_pdh].O["assembly"].O[assembly_pdh].O["gz"].S fp, e := autoio.OpenReadScanner(assembly_fn) if e != nil { return e } defer fp.Close() log.Printf(">>>> loading assembly: %s\n", assembly_pdh) if ctx.Assembly == nil { ctx.Assembly = make(map[string]map[int][]int) } ctx.Assembly[assembly_pdh] = make(map[int][]int) if ctx.AssemblyChrom == nil { ctx.AssemblyChrom = make(map[string]map[int]string) } ctx.AssemblyChrom[assembly_pdh] = make(map[int]string) path := 0 for fp.ReadScan() { l := fp.ReadText() if len(l) == 0 { continue } if l[0] == '\n' { continue } if l[0] == '>' { parts := strings.Split(l[1:], ":") name := parts[0] _ = name chrom := parts[1] _ = chrom path_s := parts[2] _path, e := strconv.ParseInt(path_s, 16, 64) if e != nil { return e } path = int(_path) ctx.Assembly[assembly_pdh][path] = make([]int, 0, 1024) ctx.AssemblyChrom[assembly_pdh][path] = chrom continue } _step, e := strconv.ParseInt(l[0:4], 16, 64) if e != nil { return e } step := int(_step) _ = step z := _skip_space(l[5:]) _ref_pos, e := strconv.ParseInt(z, 10, 64) if e != nil { return e } ref_pos := int(_ref_pos) ctx.Assembly[assembly_pdh][path] = append(ctx.Assembly[assembly_pdh][path], ref_pos) } return nil }
func (sglf *SGLF) AddGenomeLibraryCSV(fn string) error { if sglf.Lib == nil { sglf.Lib = make(map[int]map[int][]string) } if sglf.LibInfo == nil { sglf.LibInfo = make(map[int]map[int][]SGLFInfo) } ain, e := autoio.OpenReadScanner(fn) //if e!=nil { return sglf, e } if e != nil { return e } defer ain.Close() line_no := -1 if sglf.MD5Lookup == nil { sglf.MD5Lookup = make(map[string]SGLFInfo) } if sglf.PfxTagLookup == nil { sglf.PfxTagLookup = make(map[string]SGLFInfo) } if sglf.SfxTagLookup == nil { sglf.SfxTagLookup = make(map[string]SGLFInfo) } prev_pfxtag := "" prev_sfxtag := "" prev_sglf_info := SGLFInfo{} prev_tilepath := -1 // There's a corner case when we're at the last tile and // we add the sfx tag to SfxTagLookup. If one's already // added, we can consult the 'can_overwrite' and notice that // we shouldn't overwrite it. If we've added the sfxtag // to the SfxTagLookup but then later notice it's the last // tile, we can set the 'can_overwrite' entry to false // to allow the addition of a future sfxtag. // // Tags are unique so it might never come up but if // some variation at the end induces a run to be like a tag // we could run into problems (though we might have other // problems as well) // can_overwrite := make(map[string]bool) for ain.ReadScan() { line_no++ l := ain.ReadText() if len(l) == 0 { continue } if (l[0] == 0) || (l[0] == '#') { continue } line_parts := strings.Split(l, ",") //if len(line_parts)<3 { return sglf, fmt.Errorf("not enough CSV elements on line_no %d", line_no) } if len(line_parts) < 3 { return fmt.Errorf("not enough CSV elements on line_no %d", line_no) } tileid_span_parts := strings.Split(line_parts[0], "+") //if len(tileid_span_parts)!=2 { return sglf, fmt.Errorf("invalid tileid (%s) on line_no %d", line_parts[0], line_no) } if len(tileid_span_parts) != 2 { return fmt.Errorf("invalid tileid (%s) on line_no %d", line_parts[0], line_no) } tileid_parts := strings.Split(tileid_span_parts[0], ".") //if len(tileid_parts)!=4 { return sglf, fmt.Errorf("invalid tileid (%s) on line_no %d", line_parts[0], line_no) } if len(tileid_parts) != 4 { return fmt.Errorf("invalid tileid (%s) on line_no %d", line_parts[0], line_no) } tilepath_l, e := strconv.ParseInt(tileid_parts[0], 16, 64) //if e!=nil { return sglf, fmt.Errorf("%v: line_no %d\n", e, line_no) } if e != nil { return fmt.Errorf("%v: line_no %d\n", e, line_no) } tilestep_l, e := strconv.ParseInt(tileid_parts[2], 16, 64) //if e!=nil { return sglf, fmt.Errorf("%v: line_no %d\n", e, line_no) } if e != nil { return fmt.Errorf("%v: line_no %d\n", e, line_no) } tilevar_l, e := strconv.ParseInt(tileid_parts[3], 16, 64) //if e!=nil { return sglf, fmt.Errorf("%v: line_no %d\n", e, line_no) } if e != nil { return fmt.Errorf("%v: line_no %d\n", e, line_no) } //tilespan_l,e := strconv.ParseInt(tileid_span_parts[1], 16, 64) tilespan_l, e := strconv.ParseInt(tileid_span_parts[1], 10, 64) //if e!=nil { return sglf, fmt.Errorf("%v: line_no %d\n", e, line_no) } if e != nil { return fmt.Errorf("%v: line_no %d\n", e, line_no) } tilepath := int(tilepath_l) tilestep := int(tilestep_l) tilevar := int(tilevar_l) tilespan := int(tilespan_l) md5_str := line_parts[1] seq := line_parts[2] //if len(seq) < 48 { return sglf, fmt.Errorf("len(seq)<48: line_no %d", line_no) } if len(seq) < 48 { return fmt.Errorf("len(seq)<48: line_no %d", line_no) } pfxtag := seq[:24] sfxtag := seq[len(seq)-24:] if _, ok := sglf.Lib[tilepath]; !ok { sglf.Lib[tilepath] = make(map[int][]string) sglf.LibInfo[tilepath] = make(map[int][]SGLFInfo) } if _, ok := sglf.Lib[tilepath][tilestep]; !ok { sglf.Lib[tilepath][tilestep] = make([]string, 0, 16) sglf.LibInfo[tilepath][tilestep] = make([]SGLFInfo, 0, 16) } sglf.Lib[tilepath][tilestep] = append(sglf.Lib[tilepath][tilestep], seq) sglf.LibInfo[tilepath][tilestep] = append(sglf.LibInfo[tilepath][tilestep], SGLFInfo{Path: tilepath, Step: tilestep, Variant: tilevar, Span: tilespan}) sglf_info := SGLFInfo{Path: int(tilepath), Step: int(tilestep), Variant: int(tilevar), Span: int(tilespan)} sglf.MD5Lookup[md5_str] = sglf_info if prev_pfxtag != "" { sglf.PfxTagLookup[prev_pfxtag] = prev_sglf_info } if prev_sfxtag != "" { if _, ok := can_overwrite[prev_sfxtag]; !ok { //DEBUG //fmt.Printf(">>> adding prev_sfxtag %s\n", prev_sfxtag) // not in map, add it // sglf.SfxTagLookup[prev_sfxtag] = prev_sglf_info } else if can_overwrite[prev_sfxtag] { log.Printf("found suspicious tag (previously seen): '%s' at path.step.variant (%x.%x.%x)", prev_sfxtag, tilepath, tilestep, tilevar) // otherwise if we can overwrite it, do so // sglf.SfxTagLookup[prev_sfxtag] = prev_sglf_info } // Remember we've added this sfxtag // can_overwrite[prev_sfxtag] = false } if prev_tilepath != tilepath { if tilestep > 0 { can_overwrite[prev_sfxtag] = true } prev_pfxtag = "" prev_sfxtag = "" //fmt.Printf("######\n") } prev_sfxtag = sfxtag if tilestep > 0 { prev_pfxtag = pfxtag } prev_sglf_info = sglf_info prev_tilepath = tilepath //fmt.Printf("%d: %x.%x %s (%s) %s (%s)\n", line_no, tilepath, tilestep, prev_pfxtag, pfxtag, prev_sfxtag, sfxtag) //fmt.Printf(":%s:\n", l) //os.Exit(0) } if prev_pfxtag != "" { sglf.PfxTagLookup[prev_pfxtag] = prev_sglf_info } //return sglf, nil return nil }
func _main_gff_to_rotini(c *cli.Context) { var e error infn_slice := c.StringSlice("input") if len(infn_slice) < 1 { infn_slice = append(infn_slice, "-") } ain, err := autoio.OpenReadScanner(infn_slice[0]) if err != nil { fmt.Fprintf(os.Stderr, "%v", err) os.Stderr.Sync() os.Exit(1) } defer ain.Close() fp := os.Stdin if c.String("refstream") != "-" { fp, e = os.Open(c.String("refstream")) if e != nil { fmt.Fprintf(os.Stderr, "%v", e) os.Stderr.Sync() os.Exit(1) } defer fp.Close() } ref_stream := bufio.NewReader(fp) out := bufio.NewWriter(os.Stdout) gff := GFFRefVar{} gff.Init() if len(c.String("chrom")) > 0 { gff.Chrom(c.String("chrom")) } if c.Int("start") > 0 { gff.RefPos = c.Int("start") gff.PrevRefPos = gff.RefPos } line_no := 0 gff.PastaBegin(out) for ain.ReadScan() { gff_line := ain.ReadText() line_no++ if len(gff_line) == 0 || gff_line == "" { continue } e := gff.Pasta(gff_line, ref_stream, out) //if e == io.EOF { break } if (e != io.EOF) && (e != nil) { fmt.Fprintf(os.Stderr, "ERROR: %v at line %v\n", e, line_no) return } } e = gff.PastaRefEnd(ref_stream, out) if (e != io.EOF) && (e != nil) { fmt.Fprintf(os.Stderr, "ERROR: GFF PastaRefEnd: %v at line %v\n", e, line_no) return } gff.PastaEnd(out) }