// parse the UNA segment and retrieve all // our delimiters and settings. func lexUNASegment(l *lexer) stateFn { l.pos += token.Pos(len(UNA_SEGMENT_NAME)) l.emit(token.UNA_SEGMENT) //l.emit(token.SEGMENT) // read the next 6 runes, because they are the // data for the UNA segment that we need // to lex the rest for x := 0; x < 6; x++ { r := l.next() if r == eof { return l.errorf("found eof while reading UNA header") } switch x { case COMPONENT_DELIMITER_POS: l.componentDelimiter = r case DATA_DELIMITER_POS: l.dataDelimiter = r case DECIMAL_POS: l.decimal = r case RELEASE_INDICATOR_POS: l.releaseIndicator = r case REPETITION_DELIMITER_POS: l.repetitionDelimiter = r case SEGMENT_TERMINATOR_POS: l.segmentTerminator = r } } l.emit(token.UNA_TEXT) //l.emit(token.TEXT) return lexSegment }
// next returns the next rune in the input. func (l *lexer) next() rune { if int(l.pos) >= len(l.input) { l.width = 0 return eof } r, w := utf8.DecodeRuneInString(l.input[l.pos:]) l.width = token.Pos(w) l.pos += l.width return r }
// lex a data section. a data section can have // components, repetitions, and texts in it. func lexData(l *lexer) stateFn { for { switch r := l.next(); { case r == l.dataDelimiter: l.backup() l.emit(token.TEXT) return lexDataDelimiter case r == l.segmentTerminator: // now this might sound retarded (because it is) but some // companies (::cough:: relayhealth) do not escape the // quotations inside when it is used as a delimiter also. // what this does is if it detects a quote character, and // this isn't the end of the input or the data following isn't // the start of another segment, then just absorb it as if // it were token.TEXT. // note: this does not cover the case where if they don't // escape other delimiters. but i have not seen this yet. if l.foundQuote == 0 && isQuote(r) && int(l.pos) < len(l.input) { isTerm := true p := l.pos // test if the next 3 runes are upper case for x := 0; x < 3 && int(p) < len(l.input); x++ { subr, w := l.at(p) isTerm = isTerm && isUpper(subr) p += token.Pos(w) } // check to see if the 4th rune is a data delimiter if int(p) < len(l.input) { subr, _ := l.at(p) isTerm = isTerm && subr == l.dataDelimiter if !isTerm { l.foundQuote = r } } } if l.foundQuote == 0 { l.backup() l.emit(token.TEXT) return lexSegmentTerminator } else { // we absorb the quote l.foundQuote = 0 } case r == l.releaseIndicator: // skip to next character since it is escaped l.next() case r == l.componentDelimiter: l.backup() l.emit(token.TEXT) return lexComponentDelimiter case r == l.repetitionDelimiter: l.backup() l.emit(token.TEXT) return lexRepetitionDelimiter case r == eof: return l.errorf("found eof while reading data") default: // absorb } } return lexSegment }