Beispiel #1
0
// parse the UNA segment and retrieve all
// our delimiters and settings.
func lexUNASegment(l *lexer) stateFn {
	l.pos += token.Pos(len(UNA_SEGMENT_NAME))
	l.emit(token.UNA_SEGMENT)
	//l.emit(token.SEGMENT)

	// read the next 6 runes, because they are the
	// data for the UNA segment that we need
	// to lex the rest
	for x := 0; x < 6; x++ {
		r := l.next()
		if r == eof {
			return l.errorf("found eof while reading UNA header")
		}

		switch x {
		case COMPONENT_DELIMITER_POS:
			l.componentDelimiter = r
		case DATA_DELIMITER_POS:
			l.dataDelimiter = r
		case DECIMAL_POS:
			l.decimal = r
		case RELEASE_INDICATOR_POS:
			l.releaseIndicator = r
		case REPETITION_DELIMITER_POS:
			l.repetitionDelimiter = r
		case SEGMENT_TERMINATOR_POS:
			l.segmentTerminator = r
		}
	}

	l.emit(token.UNA_TEXT)
	//l.emit(token.TEXT)

	return lexSegment
}
Beispiel #2
0
// next returns the next rune in the input.
func (l *lexer) next() rune {
	if int(l.pos) >= len(l.input) {
		l.width = 0
		return eof
	}
	r, w := utf8.DecodeRuneInString(l.input[l.pos:])
	l.width = token.Pos(w)
	l.pos += l.width
	return r
}
Beispiel #3
0
// lex a data section. a data section can have
// components, repetitions, and texts in it.
func lexData(l *lexer) stateFn {
	for {
		switch r := l.next(); {
		case r == l.dataDelimiter:
			l.backup()
			l.emit(token.TEXT)
			return lexDataDelimiter
		case r == l.segmentTerminator:
			// now this might sound retarded (because it is) but some
			// companies (::cough:: relayhealth) do not escape the
			// quotations inside when it is used as a delimiter also.
			// what this does is if it detects a quote character, and
			// this isn't the end of the input or the data following isn't
			// the start of another segment, then just absorb it as if
			// it were token.TEXT.
			// note: this does not cover the case where if they don't
			// escape other delimiters. but i have not seen this yet.
			if l.foundQuote == 0 && isQuote(r) && int(l.pos) < len(l.input) {
				isTerm := true
				p := l.pos

				// test if the next 3 runes are upper case
				for x := 0; x < 3 && int(p) < len(l.input); x++ {
					subr, w := l.at(p)
					isTerm = isTerm && isUpper(subr)
					p += token.Pos(w)
				}

				// check to see if the 4th rune is a data delimiter
				if int(p) < len(l.input) {
					subr, _ := l.at(p)
					isTerm = isTerm && subr == l.dataDelimiter

					if !isTerm {
						l.foundQuote = r
					}
				}
			}

			if l.foundQuote == 0 {
				l.backup()
				l.emit(token.TEXT)
				return lexSegmentTerminator
			} else {
				// we absorb the quote
				l.foundQuote = 0
			}
		case r == l.releaseIndicator:
			// skip to next character since it is escaped
			l.next()
		case r == l.componentDelimiter:
			l.backup()
			l.emit(token.TEXT)
			return lexComponentDelimiter
		case r == l.repetitionDelimiter:
			l.backup()
			l.emit(token.TEXT)
			return lexRepetitionDelimiter
		case r == eof:
			return l.errorf("found eof while reading data")
		default:
			// absorb
		}
	}

	return lexSegment
}