예제 #1
0
파일: dna.go 프로젝트: dbarabanov/rosalind
//After identifying the exons and introns of an RNA string, we only need to delete the introns and concatenate the exons to form a new string ready for translation.
//
//Given: A DNA string s (of length at most 1 kbp) and a collection of substrings of s acting as introns. All strings are given in FASTA format.
//
//Return: A protein string resulting from transcribing and translating the exons of s. (Note: Only one solution will exist for the dataset provided.)
//
//Sample Dataset
//
//>Rosalind_10
//ATGGTCTACATAGCTGACAAACAGCACGTAGCAATCGGTCGAATCTCGAGAGGCATATGGTCACATGATCGGTCGAGCGTGTTTCAAAGTTTGCGCCTAG
//>Rosalind_12
//ATCGGTCGAA
//>Rosalind_15
//ATCGGTCGAGCGTGT
//Sample Output
//
//MVYIADKQHVASREAYGHMFKVCA
func SpliceRna(filename string) (protein string) {
	dna, introns, err := readSplcInput(filename)
	if err != nil {
		panic("failed to read input from " + filename)
	}
	st := suffix_tree.ConstructSuffixTree(dna)
	intronOffsets := make(map[int]int)
	for _, intron := range introns {
		for pos := range suffix_tree.FindSubstrings(st, intron) {
			intronOffsets[pos] = len(intron)
		}
	}
	exons := make([]rune, len(dna))
	intronEnd := 0
	inIntron := false
	l := 0
	for i, r := range dna {
		if length, present := intronOffsets[i]; present && i+length >= intronEnd {
			intronEnd = length + i
			inIntron = true
		}
		if i >= intronEnd {
			inIntron = false
		}
		if !inIntron {
			exons[l] = r
			l++
		}
	}
	//	fmt.Printf("dna  : %v\n", string(dna))
	//	fmt.Printf("exons: %v\n", string(exons))
	retVal := EncodeProtein(TranscribeRna(string(exons)))
	//	fmt.Printf("retVal: %v\n", string(retVal))
	return retVal
}
예제 #2
0
파일: dna.go 프로젝트: dbarabanov/rosalind
func RnaSplice(input string) (protein string) {
	var lineBreaks []int
	for i, r := range input {
		if r == '\n' {
			lineBreaks = util.AppendInt(lineBreaks, i)
		}
	}
	dna := input[lineBreaks[0]+1 : lineBreaks[1]]
	//	fmt.Println(dna)
	st := suffix_tree.ConstructSuffixTree(dna)
	intronOffsets := make(map[int]int)
	intronStart := lineBreaks[1] + 1
	for i, lineBreak := range lineBreaks[2:] {
		if i%2 == 1 {
			//			fmt.Println(input[intronStart:lineBreak])
			//			fmt.Println(suffix_tree.FindSubstrings(st, input[intronStart:lineBreak]))
			for k, _ := range suffix_tree.FindSubstrings(st, input[intronStart:lineBreak]) {
				//				intronOffsets = util.AppendInt(intronOffsets, k)
				intronOffsets[k] = lineBreak - intronStart
			}
			//fmt.Printf("intronOffsets: %v\n", intronOffsets)
		}
		intronStart = lineBreak + 1
	}

	exons := make([]rune, len(dna))
	intronEnd := 0
	inIntron := false
	l := 0
	for i, r := range dna {
		if length, present := intronOffsets[i]; present && i+length >= intronEnd {
			//intronEnd = intronOffsets[i]
			intronEnd = length + i
			//fmt.Printf("in new intron: %v\n", intronEnd)
			inIntron = true
		}
		if i >= intronEnd {
			inIntron = false
		}
		if !inIntron {
			exons[l] = r
			l++
		}
	}
	//fmt.Printf("dna  : %v\n", string(dna))
	//fmt.Printf("exons: %v\n", string(exons))
	//return TranscribeRna(string(exons))
	retVal := EncodeProtein(TranscribeRna(string(exons)))
	//	fmt.Printf("retVal: %v\n", string(retVal))
	return retVal
	//	return input
}