func parseDateFormat(format string) []string { format = strings.TrimSpace(format) start := 0 seps := []string{} for i := 0; i < len(format); i++ { // Date format must start and end with number. if i == 0 || i == len(format)-1 { if !unicode.IsNumber(rune(format[i])) { return nil } continue } // Separator is a single none-number char. if !unicode.IsNumber(rune(format[i])) { if !unicode.IsNumber(rune(format[i-1])) { return nil } seps = append(seps, format[start:i]) start = i + 1 } } seps = append(seps, format[start:]) return seps }
func isFloatNumber(s string) bool { if len(s) == 0 { return false } if !unicode.IsNumber(rune(s[0])) { return false } already_encountered := false if s[0] == '0' { return s[1] == '.' } else { for _, r := range s { if !unicode.IsNumber(r) && r != '.' { return false } if r == '.' { if already_encountered { return false } else { already_encountered = true } } } } return true }
func CheckRuneType(last, current rune) index.RuneType { if isTermSep(current) { return index.TokenSep } if current > 128 { return index.TokenStart } if unicode.IsLetter(current) { if unicode.IsLetter(last) { return index.TokenBody } return index.TokenStart } if unicode.IsNumber(current) { if unicode.IsNumber(last) { return index.TokenBody } return index.TokenStart } return index.TokenStart }
func get_identifier(s string, sk *Stack) (rs string, ri int) { i := 0 if s == "" { ri = 1 return } iLen := len(s) // fmt.Printf("@@@%s@@@\n", s) con := true for i < iLen && con { switch { case unicode.IsLetter(rune(s[i])), unicode.IsNumber(rune(s[i])), s[i] == '_': i++ for i < iLen { if unicode.IsLetter(rune(s[i])) || unicode.IsNumber(rune(s[i])) || s[i] == '_' { i++ } else { break } } con = false case s[i] == ' ', s[i] == '\t': i++ default: con = false i++ } } // time.Sleep(time.Second) ts := s[:i] s1 := strings.Trim(ts, " ") s = s[i:] sk.Push(s1) rs = s switch s1 { case "": ri = -1 case "char", "short", "int", "long", "float", "double", "auto", "signed", "unsigned", "const", "volatile", "static", "enum", "struct", "union", "void": ri = -1 default: if !unicode.IsLetter(rune(s1[0])) && s1[0] != '_' { ri = -1 } else { ri = 0 } } return // fmt.Printf("<<<%d|%s|%s|%d>>>\n", iLen, ts[:i], ts[i:], i) }
// readDeviceValues reads values from a single blkio file. // It expects to read values like "245:1 read 18880" or "254:1 1909". It returns // an array containing an entry for each valid line read. func readBlkioValues(path ...string) ([]blkioValue, error) { f, err := os.Open(filepath.Join(path...)) if err != nil { if os.IsNotExist(err) { return nil, nil } return nil, err } defer f.Close() var values []blkioValue sc := bufio.NewScanner(f) for sc.Scan() { line := strings.TrimSpace(sc.Text()) if len(line) == 0 { continue } // Valid lines start with a device ID. if !unicode.IsNumber(rune(line[0])) { continue } v, err := parseBlkioValue(sc.Text()) if err != nil { return nil, err } values = append(values, v) } return values, nil }
// CharType returns a string representing the unicode type of a rune func CharType(r rune) string { switch { case unicode.IsLetter(r): return "letter" case unicode.IsSpace(r): return "space" case unicode.IsPunct(r): return "punct" case unicode.IsNumber(r): return "number" case unicode.IsSymbol(r): return "symbol" case unicode.IsMark(r): return "mark" case unicode.IsDigit(r): return "digit" case unicode.IsPrint(r): return "print" case unicode.IsControl(r): return "control" case unicode.IsGraphic(r): return "graphic" default: return "invalid" } }
// Escape escapes the given data to make sure it is safe to use it as a // filename. It also replaces spaces and other seperation characters // with the '-' character. It returns an error if the escaped string is // empty. func Escape(name string) (escaped string, err error) { mfunc := func(r rune) rune { switch { case unicode.IsLetter(r): return r case unicode.IsNumber(r): return r case unicode.IsSpace(r): return '-' case unicode.IsPunct(r): return '-' } return -1 } escaped = strings.Map(mfunc, html.UnescapeString(name)) for strings.Contains(escaped, "--") { escaped = strings.Replace(escaped, "--", "-", -1) } escaped = strings.TrimPrefix(escaped, "-") escaped = strings.TrimSuffix(escaped, "-") if len(escaped) <= 0 { err = errors.New("couldn't escape title") } return }
func validateId(id string, allowEmpty bool, idName string, errors map[string][]error) (newId string, err error) { if idName == "" { idName = "id" } newId = strings.TrimSpace(id) if newId == "" { if !allowEmpty { err = ERR_MUST_SPECIFY_ID } } if err == nil { for _, rune := range newId { switch { case unicode.IsLetter(rune), unicode.IsNumber(rune): default: switch rune { case '/', '-', '_', '@', '.': default: err = ERR_INVALID_ID } } if err != nil { break } } } if err != nil && errors != nil { errors[idName] = []error{err} } return }
//Atbash implements atbash cypher func Atbash(s string) (cypher string) { s = strings.ToLower(s) // s = strings.Replace(s, " ", "", -1) var code []string var block int for _, v := range s { if unicode.IsLetter(v) || unicode.IsNumber(v) { if block == 5 { code = append(code, DELIMITER) block = 0 } if unicode.IsLetter(v) { index := v - 'a' cypherIndex := (('z' - 'a') - index) code = append(code, string(cypherIndex+'a')) } else { code = append(code, string(v)) } block++ } } cypher = strings.Join(code, "") return }
// CharCount scans a *bufio.Reader and returns a map of the counts of its // Unicode character types. func CharCount(in *bufio.Reader) map[string]int { counts := make(map[string]int) // counts of Unicode character types for { r, n, err := in.ReadRune() // returns rune, nbytes, error if err == io.EOF { break } if err != nil { fmt.Fprintf(os.Stderr, "charcount: %v\n", err) os.Exit(1) } switch { case r == unicode.ReplacementChar && n == 1: counts["invalid"]++ case unicode.IsControl(r): counts["control"]++ case unicode.IsLetter(r): counts["letter"]++ case unicode.IsMark(r): counts["mark"]++ case unicode.IsNumber(r): counts["number"]++ case unicode.IsPunct(r): counts["punct"]++ case unicode.IsSpace(r): counts["space"]++ case unicode.IsSymbol(r): counts["symbol"]++ } } return counts }
// parse12HourClock convers 12-hour clock time to 24-hour one. func parse12HourClock(word string) (time.Time, error) { lower := strings.ToLower(word) now := time.Now().In(time.Local) start := 0 hour := 0 var err error for width := 0; start < len(lower); start += width { var r rune r, width = utf8.DecodeRuneInString(lower[start:]) if !unicode.IsNumber(r) { hour, err = strconv.Atoi(lower[:start]) if err != nil || hour > 12 || hour < 0 { return time.Now(), fmt.Errorf("Wrong hour: %v", word) } if string(lower[start:]) == "am" { break } if string(lower[start:]) == "pm" { hour += 12 break } return time.Now(), fmt.Errorf("Unsupported 12 hour clock notation: %v", word) } } return time.Date(now.Year(), now.Month(), now.Day(), hour, 0, 0, 0, time.Local), nil }
func (this *Scanner) scanWord(token *TokenType, lexeme *string, tabIndex *int) { var char = this.lookahead for char != endOfFile && (unicode.IsLetter(char) || unicode.IsNumber(char)) { *lexeme += string(char) char = this.getc() } //Put back last invalid character this.ungetc(char) //Set lookahead for next lexeme this.lookahead = this.firstChar() //Finally check the symbol table for correctness /* * If the lexeme is already in the symbol table, * return its tokenclass. If it isn't, it must * be an identifier whose type we do not know yet. */ if this.St.Installname(*lexeme, tabIndex) { *token = this.St.gettok_class(*tabIndex) } else { this.St.Setattrib(*tabIndex, Stunknown, Tokidentifier) *token = Tokidentifier } }
func (x *exprLex) lexId(yylval *exprSymType) int { var b bytes.Buffer for { c := x.next() if c == lexEOF { break } // If this isn't a character we want in an ID, return out. // One day we should make this a regexp. if c != '_' && c != '-' && c != '.' && c != '*' && !unicode.IsLetter(c) && !unicode.IsNumber(c) { x.backup() break } if _, err := b.WriteRune(c); err != nil { log.Printf("ERR: %s", err) return lexEOF } } yylval.str = b.String() return IDENTIFIER }
func valid_identifier_rune(r rune) bool { if unicode.IsLetter(r) || unicode.IsNumber(r) || r == rune('-') || r == rune('_') { return true } else { return false } }
func (self *Tokenizer) readNumber() (token int, lit string) { buffer := make([]rune, 0, 1) isFloat := false sawDecimal := false firstChar := true for !self.isEof() { ch := rune(self.CurrentCh) if ch == '.' && !sawDecimal { isFloat = true sawDecimal = true buffer = append(buffer, self.CurrentCh) self.Advance() } else if firstChar && ch == '-' { buffer = append(buffer, self.CurrentCh) self.Advance() } else if unicode.IsNumber(ch) { buffer = append(buffer, self.CurrentCh) self.Advance() } else { break } firstChar = false } lit = string(buffer) if isFloat { token = FLOAT } else { token = NUMBER } return }
func FactPointer(val string) (key string, start, end int) { // A pointer looks like *key or *{key with optional spaces} // In the former case key must be alphanumeric if start = strings.Index(val, "*"); start == -1 || start+1 == len(val) { return "", -1, -1 } if val[start+1] == '{' { end = strings.Index(val[start:], "}") + start + 1 // TrimSpace since it's not possible to have a fact key that // starts/ends with a space, but someone *could* write *{ foo } key = strings.ToLower(strings.TrimSpace(val[start+2 : end-1])) } else { // util.Lexer helps find the next char that isn't alphabetical l := &Lexer{Input: val} l.Pos(start + 1) key = strings.ToLower(l.Scan(func(r rune) bool { if unicode.IsLetter(r) || unicode.IsNumber(r) { return true } return false })) end = l.Pos() // Special case handling because *pointer might be *emphasis* // perlfu's designer has a lot to answer for :-/ if l.Peek() == '*' { return "", -1, -1 } } return }
// 将文本划分成字元 func splitTextToWords(text Text) []Text { output := make([]Text, 0, len(text)/8) current := 0 inAlphanumeric := true alphanumericStart := 0 for current < len(text) { r, size := utf8.DecodeRune(text[current:]) if size <= 2 && (unicode.IsLetter(r) || unicode.IsNumber(r)) { // 当前是拉丁字母或数字(非中日韩文字) if !inAlphanumeric { alphanumericStart = current inAlphanumeric = true } } else { if inAlphanumeric { inAlphanumeric = false if current != 0 { output = append(output, toLower(text[alphanumericStart:current])) } } output = append(output, text[current:current+size]) } current += size } // 处理最后一个字元是英文的情况 if inAlphanumeric { if current != 0 { output = append(output, toLower(text[alphanumericStart:current])) } } return output }
func normalize(name string) string { fargs := func(c rune) bool { return !unicode.IsLetter(c) && !unicode.IsNumber(c) } // get function return strings.Join(strings.FieldsFunc(name, fargs), "-") }
// Stat calculates statistics for all runes read from r. func (m *Main) Stat(r io.RuneReader) (Stats, error) { var stats Stats for { // Read next character. ch, sz, err := r.ReadRune() if err == io.EOF { break } else if err != nil { return stats, err } // Calculate stats. stats.TotalN++ if unicode.IsControl(ch) { stats.ControlN++ } if unicode.IsDigit(ch) { stats.DigitN++ } if unicode.IsGraphic(ch) { stats.GraphicN++ } if unicode.IsLetter(ch) { stats.LetterN++ } if unicode.IsLower(ch) { stats.LowerN++ } if unicode.IsMark(ch) { stats.MarkN++ } if unicode.IsNumber(ch) { stats.NumberN++ } if unicode.IsPrint(ch) { stats.PrintN++ } if unicode.IsPunct(ch) { stats.PunctN++ } if unicode.IsSpace(ch) { stats.SpaceN++ } if unicode.IsSymbol(ch) { stats.SymbolN++ } if unicode.IsTitle(ch) { stats.TitleN++ } if unicode.IsUpper(ch) { stats.UpperN++ } if sz > 1 { stats.MultiByteN++ } } return stats, nil }
func (x *parserLex) lexId(yylval *parserSymType) int { var b bytes.Buffer for { c := x.next() if c == lexEOF { break } // If this isn't a character we want in an ID, return out. // One day we should make this a regexp. if c != '_' && c != '-' && c != '.' && c != '*' && !unicode.IsLetter(c) && !unicode.IsNumber(c) { x.backup() break } if _, err := b.WriteRune(c); err != nil { x.Error(err.Error()) return lexEOF } } yylval.token = &parserToken{Value: b.String()} return IDENTIFIER }
func makeGoIdentifier(in string) string { d.Chk.NotEmpty(in, "Cannot convert empty string to legal Go identifier.") if r, _ := utf8.DecodeRuneInString(in); unicode.IsNumber(r) { in = "_" + in } return illegalRune.ReplaceAllLiteralString(in, "_") }
func Tokenize(buf *bytes.Buffer) []interface{} { if buf.Len() == 0 { return nil } r := []interface{}{} acc := "" for buf.Len() > 0 { c := rune(buf.Next(1)[0]) if unicode.IsLetter(c) || unicode.IsNumber(c) || c == '"' { acc += string(c) } else if c == '(' { var nested []interface{} = Tokenize(buf) r = append(r, nested) } else if c == ')' { break } else { if acc != "" { r = append(r, acc) } acc = "" } } if acc != "" { r = append(r, acc) } return r }
func incrementCount(r rune, counts map[int]int) { switch { case unicode.IsControl(r): counts[isControl]++ case unicode.IsNumber(r): counts[isNumber]++ case unicode.IsDigit(r): counts[isDigit]++ case unicode.IsLetter(r): counts[isLetter]++ case unicode.IsMark(r): counts[isMark]++ case unicode.IsPunct(r): counts[isPunct]++ case unicode.IsSpace(r): counts[isSpace]++ case unicode.IsSymbol(r): counts[isSymbol]++ case unicode.IsPrint(r): counts[isPrint]++ case unicode.IsGraphic(r): counts[isGraphic]++ } }
func validateHarborPassword(password string) bool { correct := true number := false upper := false lower := false count := 0 for _, letter := range password { switch { case unicode.IsNumber(letter): number = true count++ case unicode.IsUpper(letter): upper = true count++ case unicode.IsLower(letter): lower = true count++ case letter == ' ': correct = false default: count++ } } return correct && number && upper && lower && (count >= 7) }
// our simplified version of MapReduce does not supply a // key to the Map function, as in the paper; only a value, // which is a part of the input file content. the return // value should be a list of key/value pairs, each represented // by a mapreduce.KeyValue. func Map(value string) *list.List { f := func(c rune) bool { return !unicode.IsLetter(c) && !unicode.IsNumber(c) } s := strings.FieldsFunc(value, f) l := list.New() m := make(map[string]string) for _, k := range s { if v, exsits := m[k]; exsits != false { v += "1" m[k] = v } else { m[k] = "1" } } for k, v := range m { //fmt.Println(v)//XXX for test kv := mapreduce.NewKeyValue(k, v) l.PushBack(kv) } return l }
// baseで指定した数値をもとに、sで記述された値をパースし返します。 // baseを使うのは、s内で単位として"%"が使われた場合のみです。 // 単位に"px"が使われた場合と単位がない場合は、 // 単位を省いた数字の部分を数値に変換して返します。 func parseRelSize(base int, s string) (int, error) { i := strings.IndexFunc(s, func(c rune) bool { // TODO: cが数字の場合はfalse、そうでない場合はtrueを返す。 // なお、iにはここがtrueになった箇所(インデックス)が入る。 // ヒント:unicodeパッケージのドキュメントを見てみよう。 return !unicode.IsNumber(c) }) // TODO: 数字のみだった場合は、単位なしの数値のみとし、 // sをint型に変換して返す。 // ヒント:stringsパッケージのドキュメントを見て、struct.IndexFuncの戻り値を調べよう。 if i < 0 { return strconv.Atoi(s) } // TODO:sのうち、数字だけの部分をint型に変換する。 v, err := strconv.Atoi(s[:i]) if err != nil { return 0, ErrInvalidSize } switch s[i:] { // TODO: "%"が指定された場合は、baseを100%として値を計算する。 case "%": return int(float64(base) * float64(v) / 100), nil case "px": return v, nil default: // TODO: "%"と"px"以外の単位が指定された場合は、ErrUnkownUnitエラーを返す。 return 0, ErrUnkownUnit } }
func ExampleFieldsFunc() { f := func(c rune) bool { return !unicode.IsLetter(c) && !unicode.IsNumber(c) } fmt.Printf("Fields are: %q", bytes.FieldsFunc([]byte(" foo1;bar2,baz3..."), f)) // Output: Fields are: ["foo1" "bar2" "baz3"] }
func TestRune_IsIndependent(t *testing.T) { numbers := make([]rune, 0) letters := make([]rune, 0) marks := make([]rune, 0) symbols := make([]rune, 0) puncts := make([]rune, 0) others := make([]rune, 0) for _, r := range unicode.Myanmar.R16 { for c := r.Lo; c <= r.Hi; c++ { switch mr := rune(c); true { case unicode.IsLetter(mr): letters = append(letters, mr) case unicode.IsNumber(mr): numbers = append(numbers, mr) case unicode.IsMark(mr): marks = append(marks, mr) case unicode.IsPunct(mr): puncts = append(puncts, mr) case unicode.IsSymbol(mr): symbols = append(symbols, mr) default: others = append(others, mr) } } } independents := string(letters) + string(numbers) + string(puncts) + " \t\r\n" for _, consonant := range independents { if ok, _ := Rune(consonant).IsIndependent(); !ok { t.Errorf("[%U] expected result is true, but it returns false", consonant) } } }
//converts a string into a slice of strings. symbols and contiguous strings of any other type //are returned as individual elements. all whitespace is excluded func getTokens(value string) []string { var buffer []rune var result []string chars := []rune(value) for i, r := range chars { if !unicode.IsLetter(r) && !unicode.IsNumber(r) && !unicode.IsDigit(r) && !unicode.IsSpace(r) { if len(buffer) > 0 { result = append(result, string(buffer)) buffer = nil } result = append(result, string(r)) } else if unicode.IsSpace(r) { if len(buffer) > 0 { result = append(result, string(buffer)) } buffer = nil } else { buffer = append(buffer, r) if i == len(chars)-1 { result = append(result, string(buffer)) } } } return result }
// keywords returns a slice of searchable tokens extracted // from the given UserID packet keywords string. func keywords(key *openpgp.PrimaryKey) []string { m := make(map[string]bool) for _, uid := range key.UserIDs { s := strings.ToLower(uid.Keywords) lbr, rbr := strings.Index(s, "<"), strings.LastIndex(s, ">") if lbr != -1 && rbr > lbr { m[s[lbr+1:rbr]] = true } if lbr != -1 { fields := strings.FieldsFunc(s[:lbr], func(r rune) bool { if !utf8.ValidRune(r) { return true } if unicode.IsLetter(r) || unicode.IsNumber(r) { return false } return true }) for _, field := range fields { m[field] = true } } } var result []string for k := range m { result = append(result, k) } return result }