func (t *ttfParser) ParseName() (err error) { err = t.Seek("name") if err == nil { tableOffset, _ := t.f.Seek(0, os.SEEK_CUR) t.rec.PostScriptName = "" t.Skip(2) // format count := t.ReadUShort() stringOffset := t.ReadUShort() for j := uint16(0); j < count && t.rec.PostScriptName == ""; j++ { t.Skip(3 * 2) // platformID, encodingID, languageID nameID := t.ReadUShort() length := t.ReadUShort() offset := t.ReadUShort() if nameID == 6 { // PostScript name t.f.Seek(int64(tableOffset)+int64(stringOffset)+int64(offset), os.SEEK_SET) var s string s, err = t.ReadStr(int(length)) if err != nil { return } s = strings.Replace(s, "\x00", "", -1) var re *regexp.Regexp if re, err = regexp.Compile("[(){}<> /%[\\]]"); err != nil { return } t.rec.PostScriptName = re.ReplaceAllString(s, "") } } if t.rec.PostScriptName == "" { err = fmt.Errorf("the name PostScript was not found") } } return }
// writeNewFile creates a new file and writes the lines to the file, stripping out the prefix if it exists. // This will return an error if the file already exists, or if there are any errors during creation. // the prefix will be removed if it is the first non-whitespace text in any line func writeNewFile(name string, lines []string, prefix string) error { out, err := createNew(name) if err != nil { return err } var reg *regexp.Regexp if len(prefix) > 0 { reg = regexp.MustCompile(fmt.Sprintf(`^(\s*)%s`, regexp.QuoteMeta(prefix))) } for _, line := range lines { if reg != nil { line = reg.ReplaceAllString(line, fmt.Sprintf(`$1`)) } if _, err := out.Write([]byte(line)); err != nil { if err2 := out.Close(); err2 != nil { return fmt.Errorf("Error writing to and closing newfile %s: %s%s", name, err, err2) } return fmt.Errorf("Error writing to newfile %s: %s", name, err) } } if err := out.Close(); err != nil { return fmt.Errorf("Error closing newfile %s: %s", name, err) } return nil }
func replaceString(src string, expr string, repl string) (ret string, err error) { var reg *regexp.Regexp if reg, err = regexp.Compile(expr); err != nil { return } ret = reg.ReplaceAllString(src, repl) return }
func replaceAll(re *regexp.Regexp, str string) string { for { newstr := re.ReplaceAllString(str, "") if newstr == str { return str } str = newstr } return str }
func replaceFirst(re *regexp.Regexp, s string, replacement string) string { // Note that ReplaceAllStringFunc cannot be used here since it does // not replace $1 placeholders. loc := re.FindStringIndex(s) if nil == loc { return s } firstMatch := s[loc[0]:loc[1]] firstMatchReplaced := re.ReplaceAllString(firstMatch, replacement) return s[0:loc[0]] + firstMatchReplaced + s[loc[1]:] }
// Parse sets a operand value to the register operand and // returns the remain string and true. // // If the source is invalid, // This returns the source itself and false. // In this case doesn't change the register operand. func (operand *Single) Parse(source string, byRegex *regexp.Regexp) (string, bool) { matches := byRegex.FindAllString(string(source), 1) if len(matches) <= 0 { return source, false } operand.SingleValue = strings.Trim(matches[0], " \t,") operand.HasValue = true remains := byRegex.ReplaceAllString(string(source), "") if strings.HasSuffix(matches[0], ",") { remains = "," + remains } return remains, true }
func generateRandomString() string { var r *regexp.Regexp var err error r, err = regexp.Compile(`[^\w]`) b := make([]byte, 32) _, err = rand.Read(b) if err != nil { // not sure what to do here... log.Fatal("couldn't read random bytes...") } return r.ReplaceAllString(base64.StdEncoding.EncodeToString(b), "") }
/** * 根据URL 规则,替换路径 */ func (this *Server) ReplacePath(path string) string { var r *regexp.Regexp for _, reg := range this.UrlRegulars { r, _ = regexp.Compile(reg.regular) if r.MatchString(path) { path = r.ReplaceAllString(path, reg.to) break } } return path }
// // splits the postPhrase into individual words func parsePostPhrase(sentence string, re *regexp.Regexp) []string { tokens := strings.Split(re.ReplaceAllString(sentence, "\u2980$1\u2980"), "\u2980") output := []string{} for _, token := range tokens { // unless the token is empty if strings.TrimSpace(token) != "" { // unless the token is :punctuation, we prepend a space if !re.MatchString(token) { token = " " + token } output = append(output, token) } } return output }
func replaceAllNamesRegex(reg *regexp.Regexp, repl string) Option { return func(cfg *Config) Option { prev := cfg.nameTransform return replaceNameTransform(func(name xml.Name) xml.Name { if prev != nil { name = prev(name) } s := reg.ReplaceAllString(name.Local, repl) if s != name.Local { cfg.debugf("changed %s -> %s", name.Local, s) } name.Local = s return name })(cfg) } }
func sanitise(s string, strict bool) string { var reg *regexp.Regexp var err error if strict { reg, err = regexp.Compile("[^A-Za-z0-9]+") } else { reg, err = regexp.Compile("[^A-Za-z0-9éèàìòù]+") } if err != nil { log.Fatal(err) } s = reg.ReplaceAllString(s, "") s = strings.ToLower(strings.Trim(s, "-")) return s }
func RegexpReplace(str, replace string, regex *regexp.Regexp, count int) string { if 0 == count { return str } if regex != nil { if count < 0 { return regex.ReplaceAllString(str, replace) } return regex.ReplaceAllStringFunc(str, func(s string) string { if count != 0 { count -= 1 return replace } return s }) } return str }
// Returns a sanitized name based on input raw input string. By a sanitized name // it means only alpha-numeric cachacters, all lower. func sanitizeName(rawName string) (string, error) { var err error var reg *regexp.Regexp var safe string if reg, err = regexp.Compile("[^A-Za-z0-9]+"); err != nil { return "", err } safe = reg.ReplaceAllString(rawName, "") safe = strings.ToLower(strings.Trim(safe, "")) if len(safe) <= 1 { err = errors.New("Result string is too short.") return "", err } return safe, nil }
// SanitizeString replaces separators with - and removes characters listed in the regexp provided from string. Accents, spaces, and all characters not in A-Za-z0-9 are replaced. func SanitizeString(s string, r *regexp.Regexp) string { // Remove any trailing space to avoid ending on - s = strings.Trim(s, " ") // Flatten accents first so that if we remove non-ascii we still get a legible name s = RemoveAccents(s) // Replace certain joining characters with a dash s = separators.ReplaceAllString(s, "-") // Remove all other unrecognised characters - NB we do allow any printable characters s = r.ReplaceAllString(s, "") // Remove any multiple dashes caused by replacements above s = dashes.ReplaceAllString(s, "-") return s }
func formatTimes(timeStr string) string { var ( re *regexp.Regexp err error times string ) times = "" if re, err = regexp.Compile("[0-9]{4}-[0-9]{2}-[0-9]{2}T"); err == nil { times = re.ReplaceAllString(timeStr, "") if re, err = regexp.Compile("Z"); err == nil { times = re.ReplaceAllString(times, "") } } return times }
func sed(rx *regexp.Regexp, repl string, r io.Reader) bool { matched := false s := bufio.NewScanner(r) for { if !s.Scan() { break } t := s.Text() if rx.MatchString(t) { matched = true } else { continue } rs := rx.ReplaceAllString(t, repl) fmt.Println(rs) } return matched }
// Marshals a part of the EDIFACT. You can pass a slice callback // to be called if a slice is found, so you can use different // delimiters depending on certain factors. // I don't really like passing in delimiterRegexp, but it saves // CPU cycles. Could possibly use cache: https://github.com/pmylund/go-cache func marshalPart(hdr Header, data reflect.Value, delimiter byte, delimiterRegexp *regexp.Regexp, sliceCallback SliceCallback) ([]byte, error) { buf := &bytes.Buffer{} if data.Kind() == reflect.Interface { data = data.Elem() } switch data.Kind() { default: return []byte(""), errors.New(fmt.Sprintf("Unknown data type: %s", data.Kind())) case reflect.String: escapedData := delimiterRegexp.ReplaceAllString(data.String(), string(hdr.ReleaseIndicator())+"$1") buf.WriteString(escapedData) case reflect.Array, reflect.Slice: // Byte slices are special. We treat them just like the string case. if data.Type().Elem().Kind() == reflect.Uint8 { escapedData := delimiterRegexp.ReplaceAll(data.Bytes(), []byte(string(hdr.ReleaseIndicator())+"$1")) buf.Write(escapedData) break } for n := 0; n < data.Len(); n++ { cdata := data.Index(n) if sliceCallback != nil { cbBytes, err := sliceCallback(cdata) if err != nil { return []byte(""), err } buf.Write(cbBytes) } // we don't want to write the delimiter after the last element if n+1 < data.Len() { buf.WriteByte(delimiter) } } } return buf.Bytes(), nil }
func pgKeyReplace(key string, re, bs, ps *regexp.Regexp) string { k := re.ReplaceAllString(key, "_") k = bs.ReplaceAllString(k, "_") k = ps.ReplaceAllString(k, ".") k = strings.Trim(k, "_") // on the off hand chance we get leading or trailing dots k = strings.Trim(k, ".") // finally, if converting search query syntax, convert all _ to '.'. // This may need to be revisited in more detail if we find ourselves // needing more finesse with escaping underscores. if config.Config.ConvertSearch { k = strings.Replace(k, "_", ".", -1) k = ps.ReplaceAllString(k, ".") } return k }
func (tb *TableBlock) unpackStrCol(dec *gob.Decoder, info SavedColumnInfo) { records := tb.RecordList[:] into := &SavedStrColumn{} err := dec.Decode(into) if err != nil { log.Println("DECODE COL ERR:", err) return } string_lookup := make(map[int32]string) key_table_len := len(tb.table.KeyTable) col_id := tb.table.get_key_id(into.Name) if int(col_id) >= key_table_len { log.Println("IGNORING COLUMN", into.Name, "SINCE ITS NOT IN KEY TABLE IN BLOCK", tb.Name) return } col := tb.GetColumnInfo(col_id) // unpack the string table // Run our replacements! str_replace, ok := OPTS.STR_REPLACEMENTS[into.Name] bucket_replace := make(map[int32]int32) var re *regexp.Regexp if ok { re, err = regexp.Compile(str_replace.pattern) } for k, v := range into.StringTable { var nv = v if re != nil { nv = re.ReplaceAllString(v, str_replace.replace) } existing_key, exists := col.StringTable[nv] v = nv if exists { bucket_replace[int32(k)] = existing_key } else { bucket_replace[int32(k)] = int32(k) col.StringTable[v] = int32(k) } string_lookup[int32(k)] = v } col.val_string_id_lookup = string_lookup is_path_col := false if FLAGS.PATH_KEY != nil { is_path_col = into.Name == *FLAGS.PATH_KEY } var record *Record var r uint32 if into.BucketEncoded { prev := uint32(0) did := into.DeltaEncodedIDs for _, bucket := range into.Bins { prev = 0 value := bucket.Value new_value, should_replace := bucket_replace[value] if should_replace { value = new_value } cast_value := StrField(new_value) for _, r = range bucket.Records { if did { r = prev + r } prev = r record = records[r] if DEBUG_RECORD_CONSISTENCY { if record.Populated[col_id] != _NO_VAL { log.Fatal("OVERWRITING RECORD VALUE", record, into.Name, col_id, bucket.Value) } } records[r].Populated[col_id] = STR_VAL records[r].Strs[col_id] = cast_value if is_path_col { record.Path = string_lookup[new_value] } } } } else { for r, v := range into.Values { new_value, should_replace := bucket_replace[v] if should_replace { v = new_value } records[r].Strs[col_id] = StrField(v) records[r].Populated[col_id] = STR_VAL } } }
// ReplaceBy replace a line by r. func (source *Source) ReplaceBy(r *regexp.Regexp, replaced string) string { old := source.line source.line = r.ReplaceAllString(source.line, replaced) return old }
func (p NumericParser) parse(s string) (*Numeric, error) { var ( n *Numeric err error sign string reStr string re *regexp.Regexp parseErr = errors.New(ParseNumericError) ) // Record whether the input string has a currency symbol. // If so, it can only be a monetary value. hasCurrency := p.currencyRegex.MatchString(s) if hasCurrency { s = p.removeCurrencySymbol(s) } // Now determine whether the string's initial character is a + or -. // If so, strip it away and record the sign. sign = "" re = regexp.MustCompile("^[\\+-]") if re.MatchString(s) { if re.FindString(s) == "-" { sign = "-" } s = s[1:] } // Since currency and sign symbols have been stripped, we now check that the // expression begins with a decimal separator (possibly) and digit. // Valid strings thus look like either: .x* or x*. reStr = "^" + p.decimalReStr + "?" + "[0-9]" re = regexp.MustCompile(reStr) if !re.MatchString(s) { return nil, parseErr } // Prepend a 0 if the string begins with a decimal separator. reStr = "^" + p.decimalReStr re = regexp.MustCompile(reStr) if re.MatchString(s) { s = "0" + s } // If the input ends with the decimal separator, remove it. re = regexp.MustCompile(p.decimalReStr + "$") if re.MatchString(s) { s = re.ReplaceAllString(s, "") } // Create the main validating regex. reStr = "^\\d+" + "(" + p.digitReStr + "\\d{3})*" + p.decimalReStr + "?\\d*$" re = regexp.MustCompile(reStr) if !re.MatchString(s) { return nil, parseErr } // We can now assume that the string is valid except for // intermediate delimiters. // Before attempting to parse the string further, we (possibly) perform // some basic sanitization. var parsed string tmp, err := p.sanitize(s) if err == nil { parsed = tmp } else { // Probably the parser cannot distinguish between decimal and digit // separators. So we handle this case separately. re = regexp.MustCompile(p.digitReStr + "|" + p.decimalReStr) locs := re.FindAllStringSubmatchIndex(s, -1) switch len(locs) { case 0: // The number is an integer. No additional parsing needed. parsed = s err = nil case 1: // Need to deal with 1,234 vs 123,456 vs 12.345, etc. parsed, err = p.parseOneUnknownSeparator(s, locs[0][0]) default: // Try to find the last separator and determine its type. parsed, err = p.parseManyUnknownSeparators(s, locs) } } parsed = sign + parsed f, ferr := strconv.ParseFloat(parsed, 64) if err != nil || ferr != nil { return nil, err } // We now know that the parsed string correctly parses as a float. n = &Numeric{ isFloat: true, f: f, } if hasCurrency { n.isMoney = true } _, err = strconv.Atoi(parsed) if err == nil { n.isInt = true } return n, nil }
// Parses the text and returns a parse tree. func Parse(text string) (listnode *ListNode, err error) { tree := &Tree{ Root: newList(), lex: lex("", text), } // these are two regexps to help us in removing the // release indicator from text and replacing it if // necessary and appropriate // -- // this first regex's job is to take any release indicator // that is not paired with a delimiter, and replace it with // a space. now this is not in the spec at all but I have seen // this in the wild (relayhealth) where they will use the release // indicator as a space. Such as: "CVS?PHARMACY" var releaseRegex1 *regexp.Regexp // this regex will simply just remove the release indicator // wherever it is paired with a delimiter. Such as: // "??" -> "?" and "?^_?^" -> "^_^" var releaseRegex2 *regexp.Regexp LOOP: for { tok := tree.next() switch tok.Typ { case token.EOF: break LOOP case token.ERROR: return nil, errors.New(tok.Val) case token.SEGMENT_TERMINATOR: // If we get a segment terminator, then append it // to our root and clear the stack. seg := newSegment() seg.List.Nodes = append(seg.List.Nodes, tree.stack...) tree.Root.append(seg) tree.stack.clear() case token.UNA_SEGMENT: tree.stack.push(newText(tok.Val)) case token.UNA_TEXT: hdr := newHeader() hdr.SegmentName = tree.stack.last() hdr.Text = newText(tok.Val) tree.Root.append(hdr) tree.stack.clear() // at this point our lex parsed all the delimiters. // so we can create our release regexps. // the %%s will get replaced later for the regex's // specific purpose. // i use QuoteMeta here just in case our delimiters // conflict with the regexp. baseRegStr := fmt.Sprintf(`%s([%%s%s])`, regexp.QuoteMeta(string(tree.lex.releaseIndicator)), regexp.QuoteMeta(fmt.Sprintf("%c%c%c%c%c", tree.lex.componentDelimiter, tree.lex.dataDelimiter, tree.lex.releaseIndicator, tree.lex.repetitionDelimiter, tree.lex.segmentTerminator))) releaseRegex1, err = regexp.Compile(fmt.Sprintf(baseRegStr, "^")) if err != nil { return nil, err } releaseRegex2, err = regexp.Compile(fmt.Sprintf(baseRegStr, "")) if err != nil { return nil, err } case token.TEXT: // explanation of this is commented by the regexp declarations tok.Val = releaseRegex1.ReplaceAllString(tok.Val, " $1") tok.Val = releaseRegex2.ReplaceAllString(tok.Val, "$1") fallthrough default: // if addToStack is true, then we push the text onto // the stack. addToStack := true if tree.stack.len() > 0 { lastnode := tree.stack.last() // Try to find a reduce function in our table for // the given last node on the stack and the lookahead // token. if tokMap, ok := REDUCETABLE[lastnode.Type()]; ok { if reducefn, ok := tokMap[tok.Typ]; ok { // we don't add this to stack since we found a // reduce function to handle it addToStack = false reducedNode, err := reducefn(lastnode, tok) if err != nil { return nil, err } // replace the last node of the stack with our // reduced node. tree.stack.setLast(reducedNode) } } } // add the text to the stack if we didn't find a reduce // function and this is a segment or text. if addToStack && (tok.Typ == token.SEGMENT || tok.Typ == token.TEXT) { tree.stack.push(newText(tok.Val)) } } } return tree.Root, nil }