Ejemplo n.º 1
0
func _main_fasta_to_pasta(c *cli.Context) {

	var e error

	infn_slice := c.StringSlice("input")
	if len(infn_slice) < 1 {
		infn_slice = append(infn_slice, "-")
	}

	ain, err := autoio.OpenReadScanner(infn_slice[0])
	if err != nil {
		fmt.Fprintf(os.Stderr, "%v", err)
		os.Stderr.Sync()
		os.Exit(1)
	}
	defer ain.Close()

	fp := os.Stdin
	if c.String("refstream") != "-" {
		fp, e = os.Open(c.String("refstream"))
		if e != nil {
			fmt.Fprintf(os.Stderr, "%v", e)
			os.Stderr.Sync()
			os.Exit(1)
		}
		defer fp.Close()
	}
	ref_stream := bufio.NewReader(fp)

	out := bufio.NewWriter(os.Stdout)

	fi := FASTAInfo{}
	fi.Init()
	fi.Allele = 0

	line_no := 0
	fi.PastaBegin(out)
	for ain.ReadScan() {
		fasta_line := ain.ReadText()
		line_no++

		if len(fasta_line) == 0 || fasta_line == "" {
			continue
		}
		e := fi.Pasta(fasta_line, ref_stream, out)
		if e != nil {
			fmt.Fprintf(os.Stderr, "ERROR: %v at line %v\n", e, line_no)
			return
		}
	}
	fi.PastaEnd(out)

}
Ejemplo n.º 2
0
func _main_gvcf_to_rotini(c *cli.Context) {
	var e error

	infn_slice := c.StringSlice("input")
	if len(infn_slice) < 1 {
		infn_slice = append(infn_slice, "-")
	}

	ain, err := autoio.OpenReadScanner(infn_slice[0])
	if err != nil {
		fmt.Fprintf(os.Stderr, "%v", err)
		os.Stderr.Sync()
		os.Exit(1)
	}
	defer ain.Close()

	fp := os.Stdin
	if c.String("refstream") != "-" {
		fp, e = os.Open(c.String("refstream"))
		if e != nil {
			fmt.Fprintf(os.Stderr, "%v", e)
			os.Stderr.Sync()
			os.Exit(1)
		}
		defer fp.Close()
	}
	ref_stream := bufio.NewReader(fp)

	out := bufio.NewWriter(os.Stdout)

	g := gvcf.GVCFRefVar{}
	g.Init()

	line_no := 0
	g.PastaBegin(out)
	for ain.ReadScan() {
		gvcf_line := ain.ReadText()
		line_no++

		if len(gvcf_line) == 0 || gvcf_line == "" {
			continue
		}
		e := g.Pasta(gvcf_line, ref_stream, out)
		if e != nil {
			fmt.Fprintf(os.Stderr, "ERROR: %v at line %v\n", e, line_no)
			return
		}
	}
	g.PastaEnd(out)

	out.Flush()

}
Ejemplo n.º 3
0
func _main_diff_to_rotini(c *cli.Context) {
	infn_slice := c.StringSlice("input")
	if len(infn_slice) < 1 {
		infn_slice = append(infn_slice, "-")
	}

	ain, err := autoio.OpenReadScanner(infn_slice[0])
	if err != nil {
		fmt.Fprintf(os.Stderr, "%v", err)
		os.Stderr.Sync()
		os.Exit(1)
	}
	defer ain.Close()

	diff_to_interleave(&ain)

}
Ejemplo n.º 4
0
func _main(c *cli.Context) {
	gShowKnotNocallInfoFlag = !c.Bool("hide-knot-low-quality")

	inp_slice := c.StringSlice("input")

	cglf_lib_location := c.String("cglf")

	action := c.String("action")
	if action == "debug" {
		//debug_read(c.String("cgf"))
		cgf.DebugRead(c.String("cgf"))
		return
	} else if action == "headercheck" {

		//header_bytes := cgf_default_header_bytes()
		header_bytes := cgf.CGFDefaultHeaderBytes()

		//hdri,dn := headerintermediate_from_bytes(header_bytes) ; _ = dn
		hdri, dn := cgf.HeaderIntermediateFromBytes(header_bytes)
		_ = dn
		//hdri_bytes := bytes_from_headerintermediate(hdri)
		hdri_bytes := cgf.BytesFromHeaderIntermediate(hdri)
		//hdri1,dn2 := headerintermediate_from_bytes(hdri_bytes) ; _ = dn2
		hdri1, dn2 := cgf.HeaderIntermediateFromBytes(hdri_bytes)
		_ = dn2

		//err := headerintermediate_cmp(hdri, hdri1)
		err := cgf.HeaderIntermediateCmp(hdri, hdri1)

		if err != nil {
			log.Fatal(err)
		}
		return
	} else if action == "header" {

		ocgf := c.String("output")

		//header_bytes := cgf_default_header_bytes()
		header_bytes := cgf.CGFDefaultHeaderBytes()

		f, err := os.Create(ocgf)
		if err != nil {
			log.Fatal(err)
		}

		f.Write(header_bytes)
		f.Sync()
		f.Close()

		return

	} else if action == "knot" {

		cglf_path := c.String("cglf")
		if len(cglf_path) == 0 {
			fmt.Fprintf(os.Stderr, "Provide CGLF\n")
			cli.ShowAppHelp(c)
			os.Exit(1)
		}

		cgf_bytes, e := ioutil.ReadFile(c.String("cgf"))
		if e != nil {
			log.Fatal(e)
		}

		//hdri,dn := headerintermediate_from_bytes(cgf_bytes[:])
		hdri, dn := cgf.HeaderIntermediateFromBytes(cgf_bytes[:])
		_ = hdri
		_ = dn

		//path,ver,step,e := parse_tilepos(c.String("tilepos"))
		path, ver, step, e := cgf.ParseTilepos(c.String("tilepos"))
		if e != nil {
			log.Fatal(e)
		}

		if path < 0 {
			log.Fatal("path must be positive")
		}
		if step < 0 {
			log.Fatal("step must be positive")
		}
		//if path >= len(hdri.step_per_path) { log.Fatal("path out of range (max ", len(hdri.step_per_path), " paths)") }
		//if step>= hdri.step_per_path[path] { log.Fatal("step out of range (max ", hdri.step_per_path[path], " steps)") }

		if path >= len(hdri.StepPerPath) {
			log.Fatal("path out of range (max ", len(hdri.StepPerPath), " paths)")
		}
		if step >= hdri.StepPerPath[path] {
			log.Fatal("step out of range (max ", hdri.StepPerPath[path], " steps)")
		}

		//pathi,_ := pathintermediate_from_bytes(hdri.path_bytes[path])
		pathi, _ := cgf.PathIntermediateFromBytes(hdri.PathBytes[path])

		knot := cgf.GetKnot(hdri.TileMap, pathi, step)
		if knot == nil {
			fmt.Printf("spanning tile?\n")
		} else {

			for i := 0; i < len(knot); i++ {
				phase_str := "A"
				if i == 1 {
					phase_str = "B"
				}

				for j := 0; j < len(knot[i]); j++ {
					fmt.Printf("%s %04x.%02x.%04x.%03x+%x",
						phase_str,
						path, ver,
						knot[i][j].Step,
						knot[i][j].VarId,
						knot[i][j].Span)

					seq := cgf.CGLFGetLibSeq(uint64(path),
						uint64(knot[i][j].Step),
						uint64(knot[i][j].VarId),
						uint64(knot[i][j].Span),
						cglf_path)

					if len(knot[i][j].NocallStartLen) > 0 {
						fmt.Printf("*{")
						for p := 0; p < len(knot[i][j].NocallStartLen); p += 2 {
							if p > 0 {
								fmt.Printf(";")
							}
							fmt.Printf("%d+%d",
								knot[i][j].NocallStartLen[p],
								knot[i][j].NocallStartLen[p+1])
						}
						fmt.Printf("}")

						//noc_seq := fill_noc_seq(seq, knot[i][j].NocallStartLen)
						noc_seq := cgf.FillNocSeq(seq, knot[i][j].NocallStartLen)
						noc_m5str := cgf.Md5sum2str(md5.Sum([]byte(noc_seq)))
						fmt.Printf(" %s\n%s\n", noc_m5str, noc_seq)
					} else {
						m5str := cgf.Md5sum2str(md5.Sum([]byte(seq)))
						fmt.Printf(" %s\n%s\n", m5str, seq)
					}

				}

			}

		}

		return

	} else if action == "fastj" {

		tilepos_str := c.String("tilepos")
		if len(tilepos_str) == 0 {
			log.Fatal("missing tilepos")
		}

		if use_SGLF {
			_sglf, e := cglf.LoadGenomeLibraryCSV(c.String("sglf"))
			if e != nil {
				log.Fatal(e)
			}

			for i := 0; i < len(inp_slice); i++ {
				//e = print_tile_sglf(inp_slice[i], tilepos_str, sglf)
				e = cgf.PrintTileSGLF(inp_slice[i], tilepos_str, _sglf)
				if e != nil {
					log.Fatal(e)
				}
			}
		} else {
			if len(c.String("cgf")) != 0 {
				inp_slice = append(inp_slice, c.String("cgf"))
			}

			for i := 0; i < len(inp_slice); i++ {
				//e := print_tile_cglf(inp_slice[i], tilepos_str, cglf_lib_location)
				e := cgf.PrintTileCGLF(inp_slice[i], tilepos_str, cglf_lib_location)
				if e != nil {
					log.Fatal(e)
				}
			}

		}

		return
	} else if action == "fastj-range" {

		tilepos_str := c.String("tilepos")

		pos_parts := strings.Split(tilepos_str, ".")
		if (len(pos_parts) != 2) && (len(pos_parts) != 3) {
			fmt.Fprintf(os.Stderr, "Invalid tilepos\n")
			cli.ShowAppHelp(c)
			os.Exit(1)
		}

		path_range, e := parseIntOption(pos_parts[0], 16)
		if e != nil {
			fmt.Fprintf(os.Stderr, "Invalid path in tilepos: %v\n", e)
			cli.ShowAppHelp(c)
			os.Exit(1)
		}

		pp := 1
		if len(pos_parts) == 3 {
			pp = 2
		}

		step_range, e := parseIntOption(pos_parts[pp], 16)
		if e != nil {
			fmt.Fprintf(os.Stderr, "Invalid step in tilepos: %v\n", e)
			cli.ShowAppHelp(c)
			os.Exit(1)
		}

		if len(tilepos_str) == 0 {
			log.Fatal("missing tilepos")
		}

		if len(c.String("sglf")) > 0 {
			use_SGLF = true
		}

		if use_SGLF {
			_sglf, e := cglf.LoadGenomeLibraryCSV(c.String("sglf"))
			_ = _sglf
			if e != nil {
				log.Fatal(e)
			}

			if len(c.String("cgf")) != 0 {
				inp_slice = append(inp_slice, c.String("cgf"))
			}

			for i := 0; i < len(inp_slice); i++ {
				cgf_bytes, e := ioutil.ReadFile(inp_slice[i])
				if e != nil {
					log.Fatal(e)
				}

				path := path_range[0][0]

				//hdri,_ := headerintermediate_from_bytes(cgf_bytes) ; _ = hdri
				//pathi,_ := pathintermediate_from_bytes(hdri.PathBytes[path]) ; _ = pathi

				hdri, _ := cgf.HeaderIntermediateFromBytes(cgf_bytes)
				_ = hdri
				pathi, _ := cgf.PathIntermediateFromBytes(hdri.PathBytes[path])
				_ = pathi

				//hdri,dn := headerintermediate_from_bytes(cgf_bytes)
				hdri, dn := cgf.HeaderIntermediateFromBytes(cgf_bytes)
				if dn < 0 {
					log.Fatal("could not construct header from bytes")
				}

				//patho,dn := pathintermediate_from_bytes(hdri.PathBytes[path])
				patho, dn := cgf.PathIntermediateFromBytes(hdri.PathBytes[path])
				if dn < 0 {
					log.Fatal("could not construct path")
				}

				tilemap_bytes, _ := cgf.CGFTilemapBytes(cgf_bytes)
				//tilemap := unpack_tilemap(tilemap_bytes)
				tilemap := cgf.UnpackTileMap(tilemap_bytes)

				for step_idx := 0; step_idx < len(step_range); step_idx++ {
					if step_range[step_idx][1] == -1 {
						//step_range[step_idx][1] = int64(hdri.step_per_path[path])
						step_range[step_idx][1] = int64(hdri.StepPerPath[path])
					}
				}

				for stepr_idx := 0; stepr_idx < len(step_range); stepr_idx++ {
					for step := step_range[stepr_idx][0]; step < step_range[stepr_idx][1]; step++ {
						//knot := GetKnot(tilemap, patho, int(step))
						//print_knot_fastj_sglf(knot, _sglf, uint64(path), 0, hdri)
						knot := cgf.GetKnot(tilemap, patho, int(step))
						cgf.PrintKnotFastjSGLF(knot, _sglf, uint64(path), 0, hdri)
					}
				}

			}
			return
		} else {
			if len(c.String("cgf")) != 0 {
				inp_slice = append(inp_slice, c.String("cgf"))
			}

			for i := 0; i < len(inp_slice); i++ {
				cgf_bytes, e := ioutil.ReadFile(inp_slice[i])
				if e != nil {
					log.Fatal(e)
				}

				path := path_range[0][0]

				_sglf := cglf.SGLF{}

				//populate_sglf_from_cglf(c.String("cglf"), &_sglf, uint64(path))
				cgf.PopulateSGLFFromCGLF(c.String("cglf"), &_sglf, uint64(path))

				os.Exit(0)

				//hdri,_ := headerintermediate_from_bytes(cgf_bytes) ; _ = hdri
				//pathi,_ := pathintermediate_from_bytes(hdri.PathBytes[path]) ; _ = pathi

				hdri, _ := cgf.HeaderIntermediateFromBytes(cgf_bytes)
				_ = hdri
				pathi, _ := cgf.PathIntermediateFromBytes(hdri.PathBytes[path])
				_ = pathi

				//hdri,dn := headerintermediate_from_bytes(cgf_bytes)
				hdri, dn := cgf.HeaderIntermediateFromBytes(cgf_bytes)
				if dn < 0 {
					log.Fatal("could not construct header from bytes")
				}

				//patho,dn := pathintermediate_from_bytes(hdri.PathBytes[path])
				patho, dn := cgf.PathIntermediateFromBytes(hdri.PathBytes[path])
				if dn < 0 {
					log.Fatal("could not construct path")
				}

				tilemap_bytes, _ := cgf.CGFTilemapBytes(cgf_bytes)
				//tilemap := unpack_tilemap(tilemap_bytes)
				tilemap := cgf.UnpackTileMap(tilemap_bytes)

				for step_idx := 0; step_idx < len(step_range); step_idx++ {
					if step_range[step_idx][1] == -1 {
						//step_range[step_idx][1] = int64(hdri.step_per_path[path])
						step_range[step_idx][1] = int64(hdri.StepPerPath[path])
					}
				}

				for stepr_idx := 0; stepr_idx < len(step_range); stepr_idx++ {
					for step := step_range[stepr_idx][0]; step < step_range[stepr_idx][1]; step++ {
						//knot := GetKnot(tilemap, patho, int(step))
						//print_knot_fastj_sglf(knot, _sglf, uint64(path), 0, hdri)
						knot := cgf.GetKnot(tilemap, patho, int(step))
						cgf.PrintKnotFastjSGLF(knot, _sglf, uint64(path), 0, hdri)
					}
				}

			}

		}

		return
	} else if action == "knot-z" {

		cgf_bytes, e := ioutil.ReadFile(c.String("cgf"))
		_ = cgf_bytes
		if e != nil {
			log.Fatal(e)
		}

		path, ver, step, e := cgf.ParseTilepos(c.String("tilepos"))
		_ = path
		_ = ver
		_ = step
		if e != nil {
			log.Fatal(e)
		}

		if path < 0 {
			log.Fatal("path must be positive")
		}
		if step < 0 {
			log.Fatal("step must be positive")
		}

		fmt.Printf("not implemented\n")

		return
	} else if action == "knot-2" {

		cgf_bytes, e := ioutil.ReadFile(c.String("cgf"))
		if e != nil {
			log.Fatal(e)
		}

		//hdri,dn := headerintermediate_from_bytes(cgf_bytes[:])
		hdri, dn := cgf.HeaderIntermediateFromBytes(cgf_bytes[:])
		_ = hdri
		_ = dn

		//path,ver,step,e := parse_tilepos(c.String("tilepos"))
		path, ver, step, e := cgf.ParseTilepos(c.String("tilepos"))
		if e != nil {
			log.Fatal(e)
		}

		if path < 0 {
			log.Fatal("path must be positive")
		}
		if step < 0 {
			log.Fatal("step must be positive")
		}
		//if path >= len(hdri.step_per_path) { log.Fatal("path out of range (max ", len(hdri.step_per_path), " paths)") }
		//if step>= hdri.step_per_path[path] { log.Fatal("step out of range (max ", hdri.step_per_path[path], " steps)") }

		if path >= len(hdri.StepPerPath) {
			log.Fatal("path out of range (max ", len(hdri.StepPerPath), " paths)")
		}
		if step >= hdri.StepPerPath[path] {
			log.Fatal("step out of range (max ", hdri.StepPerPath[path], " steps)")
		}

		//pathi,_ := pathintermediate_from_bytes(hdri.PathBytes[path])
		pathi, _ := cgf.PathIntermediateFromBytes(hdri.PathBytes[path])

		//knot := GetKnot(hdri.tilemap, pathi, step)
		knot := cgf.GetKnot(hdri.TileMap, pathi, step)
		if knot == nil {
			fmt.Printf("spanning tile?")
		} else {

			for i := 0; i < len(knot); i++ {
				for j := 0; j < len(knot[i]); j++ {
					if j > 0 {
						fmt.Printf(" ")
					}
					fmt.Printf("%04x.%02x.%04x.%03x+%x",
						path, ver,
						knot[i][j].Step,
						knot[i][j].VarId,
						knot[i][j].Span)

					if gShowKnotNocallInfoFlag {
						if len(knot[i][j].NocallStartLen) > 0 {
							fmt.Printf("*{")
							for p := 0; p < len(knot[i][j].NocallStartLen); p += 2 {
								if p > 0 {
									fmt.Printf(";")
								}
								fmt.Printf("%d+%d",
									knot[i][j].NocallStartLen[p],
									knot[i][j].NocallStartLen[p+1])
							}
							fmt.Printf("}")
						}
					}
				}
				fmt.Printf("\n")
			}

		}

		return

	} else if action == "sglfbarf" {

		//_sglf,e := LoadGenomeLibraryCSV(c.String("sglf"))
		_sglf, e := cglf.LoadGenomeLibraryCSV(c.String("sglf"))
		if e != nil {
			log.Fatal(e)
		}

		for path := range _sglf.LibInfo {
			for step := range _sglf.LibInfo[path] {
				for i := 0; i < len(_sglf.LibInfo[path][step]); i++ {
					fmt.Printf("%x,%x,%x.%x.%x+%x\n", path, step,
						_sglf.LibInfo[path][step][i].Path,
						_sglf.LibInfo[path][step][i].Step,
						_sglf.LibInfo[path][step][i].Variant,
						_sglf.LibInfo[path][step][i].Span)
				}
			}
		}

		return
	} else if action == "append" {

		_sglf, e := cglf.LoadGenomeLibraryCSV(c.String("sglf"))
		if e != nil {
			log.Fatal(e)
		}

		ain_slice := make([]autoio.AutoioHandle, 0, 8)
		for i := 0; i < len(inp_slice); i++ {
			inp_fn := inp_slice[i]
			ain, err := autoio.OpenReadScanner(inp_fn)
			_ = ain
			if err != nil {
				fmt.Fprintf(os.Stderr, "%v", err)
				os.Exit(1)
			}
			defer ain.Close()
			ain_slice = append(ain_slice, ain)
			break
		}

		path_str := c.String("path")
		path_u64, e := strconv.ParseInt(path_str, 16, 64)
		if e != nil {
			log.Fatal(e)
		}
		path := int(path_u64)

		cgf_bytes, e := ioutil.ReadFile(c.String("cgf"))
		if e != nil {
			log.Fatal(e)
		}

		//hdri,_ := headerintermediate_from_bytes(cgf_bytes[:])
		hdri, _ := cgf.HeaderIntermediateFromBytes(cgf_bytes[:])

		ctx := cgf.CGFContext{}
		_cgf := cgf.CGF{}
		_cgf.PathBytes = make([][]byte, 0, 1024)
		cgf.CGFFillHeader(&_cgf, cgf_bytes)

		ctx.CGF = &_cgf
		ctx.SGLF = &_sglf
		//CGFContext_construct_tilemap_lookup(&ctx)
		ctx.ConstructTileMapLookup()

		//allele_path,e := load_sample_fastj(&ain_slice[0])
		allele_path, e := cgf.LoadSampleFastj(&ain_slice[0])
		if e != nil {
			log.Fatal(e)
		}

		//PathBytes,e := emit_path_bytes(&ctx, path, allele_path)
		PathBytes, e := ctx.EmitPathBytes(path, allele_path)
		if e != nil {
			log.Fatal(e)
		}

		//headerintermediate_add_path(&hdri, path, PathBytes)
		//write_cgf_from_intermediate(c.String("output"), &hdri)

		cgf.HeaderIntermediateAddPath(&hdri, path, PathBytes)
		cgf.WriteCGFFromIntermediate(c.String("output"), &hdri)

		return
	}

	ain_slice := make([]autoio.AutoioHandle, 0, 8)
	for i := 0; i < len(inp_slice); i++ {
		inp_fn := inp_slice[i]
		ain, err := autoio.OpenReadScanner(inp_fn)
		_ = ain
		if err != nil {
			fmt.Fprintf(os.Stderr, "%v", err)
			os.Exit(1)
		}
		defer ain.Close()
		ain_slice = append(ain_slice, ain)
	}

	aout, err := autoio.CreateWriter(c.String("output"))
	_ = aout
	if err != nil {
		fmt.Fprintf(os.Stderr, "%v", err)
		os.Exit(1)
	}
	defer func() { aout.Flush(); aout.Close() }()

	if c.Bool("pprof") {
		gProfileFlag = true
		gProfileFile = c.String("pprof-file")
	}

	if c.Bool("mprof") {
		gMemProfileFlag = true
		gMemProfileFile = c.String("mprof-file")
	}

	gVerboseFlag = c.Bool("Verbose")

	if c.Int("max-procs") > 0 {
		runtime.GOMAXPROCS(c.Int("max-procs"))
	}

	if gProfileFlag {
		prof_f, err := os.Create(gProfileFile)
		if err != nil {
			fmt.Fprintf(os.Stderr, "Could not open profile file %s: %v\n", gProfileFile, err)
			os.Exit(2)
		}

		pprof.StartCPUProfile(prof_f)
		defer pprof.StopCPUProfile()
	}

	_sglf, e := cglf.LoadGenomeLibraryCSV(c.String("sglf"))
	if e != nil {
		log.Fatal(e)
	}

	ctx := cgf.CGFContext{}
	_cgf := cgf.CGF{}

	//header_bytes := cgf_default_header_bytes()
	header_bytes := cgf.CGFDefaultHeaderBytes()
	cgf.CGFFillHeader(&_cgf, header_bytes)

	ctx.CGF = &_cgf
	ctx.SGLF = &_sglf
	//CGFContext_construct_tilemap_lookup(&ctx)
	ctx.ConstructTileMapLookup()

	for i := 0; i < len(ain_slice); i++ {
		ain := ain_slice[i]

		//allele_path,e := load_sample_fastj(&ain)
		allele_path, e := cgf.LoadSampleFastj(&ain)
		if e != nil {
			log.Fatal(e)
		}

		p := 0x2c5
		if i > 0 {
			p = 0x247
		}

		//e = update_vector_path_simple(&ctx, p, allele_path)
		e = ctx.UpdateVectorPathSimple(p, allele_path)
		if len(ctx.CGF.StepPerPath) < len(ain_slice) {
			ctx.CGF.StepPerPath = append(ctx.CGF.StepPerPath, uint64(len(_sglf.Lib[p])))
		}

	}

	ctx.CGF.PathCount = uint64(len(_cgf.Path))
	ctx.CGF.StepPerPath = make([]uint64, ctx.CGF.PathCount)
	for i := uint64(0); i < ctx.CGF.PathCount; i++ {
		ctx.CGF.StepPerPath[i] = uint64(len(_sglf.Lib[int(i)]))
	}

	//write_cgf(&ctx, "out.cgf")
	ctx.WriteCGF("out.cgf")

}
Ejemplo n.º 5
0
func load_Assembly(ctx *LanternContext, tagset_pdh, assembly_pdh string) error {
	assembly_fn := ctx.Config.O["tagset"].O[tagset_pdh].O["assembly"].O[assembly_pdh].O["gz"].S
	fp, e := autoio.OpenReadScanner(assembly_fn)
	if e != nil {
		return e
	}
	defer fp.Close()

	log.Printf(">>>> loading assembly: %s\n", assembly_pdh)

	if ctx.Assembly == nil {
		ctx.Assembly = make(map[string]map[int][]int)
	}
	ctx.Assembly[assembly_pdh] = make(map[int][]int)

	if ctx.AssemblyChrom == nil {
		ctx.AssemblyChrom = make(map[string]map[int]string)
	}
	ctx.AssemblyChrom[assembly_pdh] = make(map[int]string)

	path := 0
	for fp.ReadScan() {
		l := fp.ReadText()

		if len(l) == 0 {
			continue
		}
		if l[0] == '\n' {
			continue
		}

		if l[0] == '>' {
			parts := strings.Split(l[1:], ":")
			name := parts[0]
			_ = name
			chrom := parts[1]
			_ = chrom
			path_s := parts[2]

			_path, e := strconv.ParseInt(path_s, 16, 64)
			if e != nil {
				return e
			}
			path = int(_path)

			ctx.Assembly[assembly_pdh][path] = make([]int, 0, 1024)
			ctx.AssemblyChrom[assembly_pdh][path] = chrom
			continue
		}

		_step, e := strconv.ParseInt(l[0:4], 16, 64)
		if e != nil {
			return e
		}
		step := int(_step)
		_ = step

		z := _skip_space(l[5:])
		_ref_pos, e := strconv.ParseInt(z, 10, 64)
		if e != nil {
			return e
		}
		ref_pos := int(_ref_pos)

		ctx.Assembly[assembly_pdh][path] = append(ctx.Assembly[assembly_pdh][path], ref_pos)
	}

	return nil
}
Ejemplo n.º 6
0
func (sglf *SGLF) AddGenomeLibraryCSV(fn string) error {

	if sglf.Lib == nil {
		sglf.Lib = make(map[int]map[int][]string)
	}

	if sglf.LibInfo == nil {
		sglf.LibInfo = make(map[int]map[int][]SGLFInfo)
	}

	ain, e := autoio.OpenReadScanner(fn)
	//if e!=nil { return sglf, e }
	if e != nil {
		return e
	}
	defer ain.Close()

	line_no := -1

	if sglf.MD5Lookup == nil {
		sglf.MD5Lookup = make(map[string]SGLFInfo)
	}

	if sglf.PfxTagLookup == nil {
		sglf.PfxTagLookup = make(map[string]SGLFInfo)
	}

	if sglf.SfxTagLookup == nil {
		sglf.SfxTagLookup = make(map[string]SGLFInfo)
	}

	prev_pfxtag := ""
	prev_sfxtag := ""
	prev_sglf_info := SGLFInfo{}
	prev_tilepath := -1

	// There's a corner case when we're at the last tile and
	// we add the sfx tag to SfxTagLookup.  If one's already
	// added, we can consult the 'can_overwrite' and notice that
	// we shouldn't overwrite it.  If we've added the sfxtag
	// to the SfxTagLookup but then later notice it's the last
	// tile, we can set the 'can_overwrite' entry to false
	// to allow the addition of a future sfxtag.
	//
	// Tags are unique so it might never come up but if
	// some variation at the end induces a run to be like a tag
	// we could run into problems (though we might have other
	// problems as well)
	//
	can_overwrite := make(map[string]bool)

	for ain.ReadScan() {
		line_no++
		l := ain.ReadText()
		if len(l) == 0 {
			continue
		}
		if (l[0] == 0) || (l[0] == '#') {
			continue
		}

		line_parts := strings.Split(l, ",")
		//if len(line_parts)<3 { return sglf, fmt.Errorf("not enough CSV elements on line_no %d", line_no) }
		if len(line_parts) < 3 {
			return fmt.Errorf("not enough CSV elements on line_no %d", line_no)
		}

		tileid_span_parts := strings.Split(line_parts[0], "+")
		//if len(tileid_span_parts)!=2 { return sglf, fmt.Errorf("invalid tileid (%s) on line_no %d", line_parts[0], line_no) }
		if len(tileid_span_parts) != 2 {
			return fmt.Errorf("invalid tileid (%s) on line_no %d", line_parts[0], line_no)
		}

		tileid_parts := strings.Split(tileid_span_parts[0], ".")
		//if len(tileid_parts)!=4 { return sglf, fmt.Errorf("invalid tileid (%s) on line_no %d", line_parts[0], line_no) }
		if len(tileid_parts) != 4 {
			return fmt.Errorf("invalid tileid (%s) on line_no %d", line_parts[0], line_no)
		}

		tilepath_l, e := strconv.ParseInt(tileid_parts[0], 16, 64)
		//if e!=nil { return sglf, fmt.Errorf("%v: line_no %d\n", e, line_no) }
		if e != nil {
			return fmt.Errorf("%v: line_no %d\n", e, line_no)
		}

		tilestep_l, e := strconv.ParseInt(tileid_parts[2], 16, 64)
		//if e!=nil { return sglf, fmt.Errorf("%v: line_no %d\n", e, line_no) }
		if e != nil {
			return fmt.Errorf("%v: line_no %d\n", e, line_no)
		}

		tilevar_l, e := strconv.ParseInt(tileid_parts[3], 16, 64)
		//if e!=nil { return sglf, fmt.Errorf("%v: line_no %d\n", e, line_no) }
		if e != nil {
			return fmt.Errorf("%v: line_no %d\n", e, line_no)
		}

		//tilespan_l,e := strconv.ParseInt(tileid_span_parts[1], 16, 64)
		tilespan_l, e := strconv.ParseInt(tileid_span_parts[1], 10, 64)
		//if e!=nil { return sglf, fmt.Errorf("%v: line_no %d\n", e, line_no) }
		if e != nil {
			return fmt.Errorf("%v: line_no %d\n", e, line_no)
		}

		tilepath := int(tilepath_l)
		tilestep := int(tilestep_l)
		tilevar := int(tilevar_l)
		tilespan := int(tilespan_l)

		md5_str := line_parts[1]
		seq := line_parts[2]

		//if len(seq) < 48 { return sglf, fmt.Errorf("len(seq)<48: line_no %d", line_no) }
		if len(seq) < 48 {
			return fmt.Errorf("len(seq)<48: line_no %d", line_no)
		}

		pfxtag := seq[:24]
		sfxtag := seq[len(seq)-24:]

		if _, ok := sglf.Lib[tilepath]; !ok {
			sglf.Lib[tilepath] = make(map[int][]string)
			sglf.LibInfo[tilepath] = make(map[int][]SGLFInfo)
		}
		if _, ok := sglf.Lib[tilepath][tilestep]; !ok {
			sglf.Lib[tilepath][tilestep] = make([]string, 0, 16)
			sglf.LibInfo[tilepath][tilestep] = make([]SGLFInfo, 0, 16)
		}
		sglf.Lib[tilepath][tilestep] = append(sglf.Lib[tilepath][tilestep], seq)
		sglf.LibInfo[tilepath][tilestep] = append(sglf.LibInfo[tilepath][tilestep], SGLFInfo{Path: tilepath, Step: tilestep, Variant: tilevar, Span: tilespan})

		sglf_info := SGLFInfo{Path: int(tilepath), Step: int(tilestep), Variant: int(tilevar), Span: int(tilespan)}
		sglf.MD5Lookup[md5_str] = sglf_info

		if prev_pfxtag != "" {
			sglf.PfxTagLookup[prev_pfxtag] = prev_sglf_info
		}
		if prev_sfxtag != "" {

			if _, ok := can_overwrite[prev_sfxtag]; !ok {

				//DEBUG
				//fmt.Printf(">>> adding prev_sfxtag %s\n", prev_sfxtag)

				// not in map, add it
				//
				sglf.SfxTagLookup[prev_sfxtag] = prev_sglf_info
			} else if can_overwrite[prev_sfxtag] {

				log.Printf("found suspicious tag (previously seen): '%s' at path.step.variant (%x.%x.%x)",
					prev_sfxtag, tilepath, tilestep, tilevar)

				// otherwise if we can overwrite it, do so
				//
				sglf.SfxTagLookup[prev_sfxtag] = prev_sglf_info

			}

			// Remember we've added this sfxtag
			//
			can_overwrite[prev_sfxtag] = false

		}

		if prev_tilepath != tilepath {

			if tilestep > 0 {
				can_overwrite[prev_sfxtag] = true
			}

			prev_pfxtag = ""
			prev_sfxtag = ""

			//fmt.Printf("######\n")
		}
		prev_sfxtag = sfxtag

		if tilestep > 0 {
			prev_pfxtag = pfxtag
		}

		prev_sglf_info = sglf_info
		prev_tilepath = tilepath

		//fmt.Printf("%d: %x.%x %s (%s) %s (%s)\n", line_no, tilepath, tilestep, prev_pfxtag, pfxtag, prev_sfxtag, sfxtag)

		//fmt.Printf(":%s:\n", l)
		//os.Exit(0)
	}

	if prev_pfxtag != "" {
		sglf.PfxTagLookup[prev_pfxtag] = prev_sglf_info
	}

	//return sglf, nil
	return nil
}
Ejemplo n.º 7
0
func _main_gff_to_rotini(c *cli.Context) {
	var e error

	infn_slice := c.StringSlice("input")
	if len(infn_slice) < 1 {
		infn_slice = append(infn_slice, "-")
	}

	ain, err := autoio.OpenReadScanner(infn_slice[0])
	if err != nil {
		fmt.Fprintf(os.Stderr, "%v", err)
		os.Stderr.Sync()
		os.Exit(1)
	}
	defer ain.Close()

	fp := os.Stdin
	if c.String("refstream") != "-" {
		fp, e = os.Open(c.String("refstream"))
		if e != nil {
			fmt.Fprintf(os.Stderr, "%v", e)
			os.Stderr.Sync()
			os.Exit(1)
		}
		defer fp.Close()
	}
	ref_stream := bufio.NewReader(fp)

	out := bufio.NewWriter(os.Stdout)

	gff := GFFRefVar{}
	gff.Init()

	if len(c.String("chrom")) > 0 {
		gff.Chrom(c.String("chrom"))
	}

	if c.Int("start") > 0 {
		gff.RefPos = c.Int("start")
		gff.PrevRefPos = gff.RefPos
	}

	line_no := 0
	gff.PastaBegin(out)
	for ain.ReadScan() {
		gff_line := ain.ReadText()
		line_no++

		if len(gff_line) == 0 || gff_line == "" {
			continue
		}
		e := gff.Pasta(gff_line, ref_stream, out)
		//if e == io.EOF { break }
		if (e != io.EOF) && (e != nil) {
			fmt.Fprintf(os.Stderr, "ERROR: %v at line %v\n", e, line_no)
			return
		}
	}

	e = gff.PastaRefEnd(ref_stream, out)

	if (e != io.EOF) && (e != nil) {
		fmt.Fprintf(os.Stderr, "ERROR: GFF PastaRefEnd: %v at line %v\n", e, line_no)
		return
	}

	gff.PastaEnd(out)
}