Пример #1
0
func NormalizeTitle(title string) string {
	normalizedTitle := title
	normalizedTitle = strings.ToLower(normalizedTitle)
	normalizedTitle = RomanizeHepburn(title)
	normalizedTitle = strings.ToLower(normalizedTitle)
	normalizedTitle = RemoveTrailingApostrophe(normalizedTitle)
	normalizedTitle, _, _ = transform.String(transform.Chain(
		norm.NFD,
		transform.RemoveFunc(func(r rune) bool {
			return unicode.Is(unicode.Mn, r)
		}),
		norm.NFC), normalizedTitle)
	normalizedTitle = strings.ToLower(normalizedTitle)
	normalizedTitle = regexp.MustCompile(`\(\d+\)`).ReplaceAllString(normalizedTitle, " ")
	normalizedTitle = strings.Map(func(r rune) rune {
		if !unicode.IsLetter(r) && !unicode.IsDigit(r) && r != '.' {
			return ' '
		}
		return r
	}, normalizedTitle)
	normalizedTitle = regexp.MustCompile(`\s+`).ReplaceAllString(normalizedTitle, " ")
	normalizedTitle = strings.TrimSpace(normalizedTitle)

	return normalizedTitle
}
Пример #2
0
func init() {
	words = make(map[string]int)
	count = 0

	amz = "./amazon_cells_labelled.txt"
	ylp = "./yelp_labelled.txt"
	imd = "./imdb_labelled.txt"

	sanitize = transform.RemoveFunc(func(r rune) bool {
		switch {
		case r >= 'A' && r <= 'Z':
			return false
		case r >= 'a' && r <= 'z':
			return false
		case r >= '0' && r <= '1':
			return false
		case r == ' ':
			return false
		case r == '\t':
			return false
		default:
			return true
		}
	})

	rand.Seed(42)
}
Пример #3
0
func main() {
	t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC)
	r := transform.NewReader(os.Stdin, t)
	if _, err := io.Copy(os.Stdout, r); err != nil {
		log.Fatal(err)
	}
}
Пример #4
0
func removeNlChars(str string) string {
	isOk := func(r rune) bool {
		return r < 32 || r >= 127
	}
	t := transform.Chain(norm.NFKD, transform.RemoveFunc(isOk))
	str, _, _ = transform.String(t, str)
	return str
}
Пример #5
0
func TestLettersShouldPass1(t *testing.T) {
	s, _, _ := transform.String(transform.RemoveFunc(OnlyLetters), "THIS iz A L337 aNd Un'Sani~~~~tized sentence")
	sanitized := []rune(s)

	for i := range sanitized {
		assert.False(t, OnlyLetters(sanitized[i]), "Letter %v should be sanitized", sanitized[i])
	}
}
Пример #6
0
// NewNaiveBayes returns a NaiveBayes model the
// given number of classes instantiated, ready
// to learn off the given data stream. The sanitization
// function is set to the given function. It must
// comply with the transform.RemoveFunc interface
func NewNaiveBayes(stream <-chan base.TextDatapoint, classes uint8, sanitize func(rune) bool) *NaiveBayes {
	return &NaiveBayes{
		Words:         make(map[string]Word),
		Count:         make([]uint64, classes),
		Probabilities: make([]float64, classes),

		sanitize: transform.RemoveFunc(sanitize),
		stream:   stream,
	}
}
Пример #7
0
// RestoreWithFuncs takes raw JSON data of a model and
// restores a model from it. The tokenizer and sanitizer
// passed in will be assigned to the restored model.
func (b *NaiveBayes) RestoreWithFuncs(data io.Reader, sanitizer func(rune) bool, tokenizer Tokenizer) error {
	if b == nil {
		return errors.New("Cannot restore a model to a nil pointer")
	}
	err := json.NewDecoder(data).Decode(b)
	if err != nil {
		return err
	}
	b.sanitize = transform.RemoveFunc(sanitizer)
	b.Tokenizer = tokenizer
	return nil
}
Пример #8
0
//function to sanitize input
//from: http://rosettacode.org/wiki/Strip_control_codes_and_extended_characters_from_a_string#Go
func stripCtlAndExtFromUnicode(str string) string {
	isOk := func(r rune) bool {
		return r < 32 || r >= 127
	}
	// The isOk filter is such that there is no need to chain to norm.NFC
	t := transform.Chain(norm.NFKD, transform.RemoveFunc(isOk))
	// This Transformer could also trivially be applied as an io.Reader
	// or io.Writer filter to automatically do such filtering when reading
	// or writing data anywhere.
	str, _, _ = transform.String(t, str)
	return str
}
Пример #9
0
// NewNaiveBayes returns a NaiveBayes model the
// given number of classes instantiated, ready
// to learn off the given data stream. The sanitization
// function is set to the given function. It must
// comply with the transform.RemoveFunc interface
func NewNaiveBayes(stream <-chan base.TextDatapoint, classes uint8, sanitize func(rune) bool) *NaiveBayes {
	return &NaiveBayes{
		Words:         concurrentMap{sync.RWMutex{}, make(map[string]Word)},
		Count:         make([]uint64, classes),
		Probabilities: make([]float64, classes),

		sanitize:  transform.RemoveFunc(sanitize),
		stream:    stream,
		Tokenizer: &SimpleTokenizer{SplitOn: " "},

		Output: os.Stdout,
	}
}
Пример #10
0
func TestAsciiLetters(t *testing.T) {
	tests := []testCase{
		{"THIS iz A L337 aNd Un'Sani~~~~tized sentence", "THISizALaNdUnSanitizedsentence"},
		{"here're some unicode letters: --Æ.ÒÑ", "hereresomeunicodeletters"},
		{")(*&^%$@!@#$%^&*(*&^%$#@#$%", ""},
	}
	for _, test := range tests {
		s, _, _ := transform.String(transform.RemoveFunc(OnlyAsciiLetters), test.input)
		if s != test.expectedOutput {
			t.Errorf("got \"%s\" expected \"%s\"\n", s, test.expectedOutput)
		}
	}
}
Пример #11
0
// GetCompatibleString removes all the special characters
// from the string name to create a new string compatible
// with different file names.
func GetCompatibleString(name string) string {
	// Replace all the & signs with and text
	name = strings.Replace(name, "&", "and", -1)
	// Change all the characters to ASCII
	t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC)
	result, _, _ := transform.String(t, name)
	// Replace all the spaces with underscore
	s, _ := regexp.Compile(`\s+`)
	result = s.ReplaceAllString(result, "_")
	// Remove all the non alphanumeric characters
	r, _ := regexp.Compile(`\W`)
	result = r.ReplaceAllString(result, "")
	return result
}
Пример #12
0
func ExampleRemoveFunc() {
	input := []byte(`tschüß; до свидания`)

	b := make([]byte, len(input))

	t := transform.RemoveFunc(unicode.IsSpace)
	n, _, _ := t.Transform(b, input, true)
	fmt.Println(string(b[:n]))

	t = transform.RemoveFunc(func(r rune) bool {
		return !unicode.Is(unicode.Latin, r)
	})
	n, _, _ = t.Transform(b, input, true)
	fmt.Println(string(b[:n]))

	n, _, _ = t.Transform(b, norm.NFD.Bytes(input), true)
	fmt.Println(string(b[:n]))

	// Output:
	// tschüß;досвидания
	// tschüß
	// tschuß
}
Пример #13
0
func normalize(name, src string) (string, error) {
	if name == "" {
		name = baseWithoutExt(src)
	}
	t := transform.Chain(norm.NFD, transform.RemoveFunc(remove), norm.NFC)
	name = strings.TrimSpace(name)
	name, _, err := transform.String(t, name)
	if err != nil {
		return "", err
	}
	name = strings.ToLower(name)
	name = strings.Replace(name, " ", "_", -1)
	return name, nil
}
Пример #14
0
// normalize does unicode normalization.
func normalize(in []byte) ([]byte, error) {
	// We need a new transformer for each input as it cannot be reused.
	filter := func(r rune) bool {
		return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks (to be removed)
	}
	transformer := transform.Chain(norm.NFD, transform.RemoveFunc(filter), norm.NFC)
	out, _, err := transform.Bytes(transformer, in)
	out = bytes.Map(func(r rune) rune {
		if unicode.IsPunct(r) { // Replace punctuations with spaces.
			return ' '
		}
		return unicode.ToLower(r) // Convert to lower case.
	}, out)
	return out, err
}
func replace(path string) {
	copy := []string{}
	r := `(<script(\s|\S)*?<\/script>)|(<style(\s|\S)*?<\/style>)|(<!--(\s|\S)*?-->)|(<\/?(\s|\S)*?>)|(nbsp;)|((?:\s)\s)|(png)|(jpeg)|(jpg)|(mpg)|(\\u0026)|(\n)|(\v)|(\r)|(\0)|(\t)|(n°)
		|(à)|(wbe)|(_)`
	regex, err := regexp.Compile(r)
	if err != nil {
		return // there was a problem with the regular expression.
	}
	c, _ := readLines(path)
	for _, v := range c {
		reg := regex.ReplaceAllString(v, " ")
		slug := utils.GenerateSlug(reg)
		regex1, _ := regexp.Compile(`((\-){1,})|(\b\w{1}\b)`)
		reg = regex1.ReplaceAllString(slug, " ")
		t := stripchars(reg, `?,.!/©*@#~()$+"'&}]|:;[{²`)
		s := strings.TrimSpace(t)
		// fmt.Println(s)

		normalize := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC)
		normStr1, _, _ := transform.String(normalize, s)
		// fmt.Println(normStr1)

		if len(v) > 0 {
			copy = append(copy, normStr1)
		}
	}

	// fmt.Println(cleaned, "\n")

	j := strings.Replace(strings.Join((copy), " "), " ", ",", -1)
	// fmt.Println(j)
	regex2, err := regexp.Compile(`((\,){2,})`)
	j1 := regex2.ReplaceAllString(j, ",")
	// fmt.Println(j1)
	j2 := strings.Split(j1, ",")

	cleaned := []string{}

	for _, value := range j2 {
		if !stringInSlice(value, cleaned) {
			cleaned = append(cleaned, value)
		}
	}
	createCsv(path, filenameCsv, strings.Join(cleaned, ","))
}
Пример #16
0
// UnicodeSanitize sanitizes string to be used in Hugo URL's, allowing only
// a predefined set of special Unicode characters.
// If RemovePathAccents configuration flag is enabled, Uniccode accents
// are also removed.
func UnicodeSanitize(s string) string {
	source := []rune(s)
	target := make([]rune, 0, len(source))

	for _, r := range source {
		if unicode.IsLetter(r) || unicode.IsDigit(r) || unicode.IsMark(r) || r == '%' || r == '.' || r == '/' || r == '\\' || r == '_' || r == '-' || r == '#' || r == '+' {
			target = append(target, r)
		}
	}

	var result string

	if viper.GetBool("RemovePathAccents") {
		// remove accents - see https://blog.golang.org/normalization
		t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC)
		result, _, _ = transform.String(t, string(target))
	} else {
		result = string(target)
	}

	return result
}
Пример #17
0
func cleanSalary(input string) string {
	cleaner := transform.Chain(norm.NFD,
		transform.RemoveFunc(func(r rune) bool {
			return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks
		}),
		norm.NFC)
	output, _, _ := transform.String(cleaner, input)
	output = strings.ToLower(output)
	m := reSalarySep.FindStringSubmatchIndex(output)
	if m != nil {
		output = output[:m[0]+1] + " - " + output[m[1]-1:]
	}
	for {
		m := reSalarySplit.FindStringSubmatchIndex(output)
		if m == nil {
			break
		}
		_, e1 := m[2], m[3]
		s2, _ := m[4], m[5]
		output = output[:e1] + output[s2:]
	}
	return output
}
Пример #18
0
func cleanName(name string) string {
	name = strings.Replace(name, "ß", "ss", -1)
	name = strings.Replace(name, "Σ", "e", -1)
	name = strings.Replace(name, "æ", "a", -1)
	name = strings.Replace(name, "&", "and", -1)
	name = strings.Replace(name, "$", "s", -1)
	for _, c := range removeChars {
		name = strings.Replace(name, c, "", -1)
	}
	for _, c := range spaceChars {
		name = strings.Replace(name, c, " ", -1)
	}

	name = badChanRegex.ReplaceAllString(name, "")
	name = strings.Join(strings.Fields(name), " ")
	t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC)
	unicodeCleanedName, _, err := transform.String(t, name)

	if err == nil {
		name = unicodeCleanedName
	}

	return strings.Trim(name, ` "`)
}
Пример #19
0
func Bytes(b []byte) ([]byte, error) {
	t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC)
	res, _, err := transform.Bytes(t, b)
	return res, err
}
Пример #20
0
func init() {
	stripT = transform.Chain(
		norm.NFD,
		transform.RemoveFunc(isMn),
		norm.NFC)
}
Пример #21
0
type PathSlice []Path

// Swap implements sort.Interface (and index.Swapper).
func (p PathSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }

// Less implements sort.Interface.
func (p PathSlice) Less(i, j int) bool { return p[i].Encode() < p[j].Encode() }

// Len implements sort.Interface.
func (p PathSlice) Len() int { return len(p) }

func isMn(r rune) bool {
	return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks
}

var transformer = transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC)

func removeNonAlphaNumeric(s string) string {
	in := []rune(s)
	res := make([]rune, len(in))
	i := 0
	for _, x := range s {
		if x == '-' {
			res[i] = ' '
			i++
			continue
		}
		if unicode.IsLetter(x) || unicode.IsDigit(x) || unicode.IsSpace(x) {
			res[i] = unicode.ToLower(x)
			i++
		}
Пример #22
0
func TestLettersShouldPass2(t *testing.T) {
	s, _, _ := transform.String(transform.RemoveFunc(OnlyLetters), "0876543212     3456789)(*&^    %$@!@#$%^&    *(*&^%$#@#$%")
	sanitized := []rune(s)

	assert.Equal(t, 0, len(sanitized), "Length of string should be 0")
}
Пример #23
0
var mappedToNothing = transform.RemoveFunc(func(r rune) bool {
	//TODO: replace by a unicode.RangeTable
	switch r {
	case '\u00AD':
	case '\u034F':
	case '\u1806':
	case '\u180B':
	case '\u180C':
	case '\u180D':
	case '\u200B':
	case '\u200C':
	case '\u200D':
	case '\u2060':
	case '\uFE00':
	case '\uFE01':
	case '\uFE02':
	case '\uFE03':
	case '\uFE04':
	case '\uFE05':
	case '\uFE06':
	case '\uFE07':
	case '\uFE08':
	case '\uFE09':
	case '\uFE0A':
	case '\uFE0B':
	case '\uFE0C':
	case '\uFE0D':
	case '\uFE0E':
	case '\uFE0F':
	case '\uFEFF':
	default:
		return false
	}

	return true
})
Пример #24
0
func TestWordsAndNumbersShouldPass2(t *testing.T) {
	s, _, _ := transform.String(transform.RemoveFunc(OnlyWordsAndNumbers), ")(*&^%$@!@#$%^&*(*&^%$#@#$%")
	sanitized := []rune(s)

	assert.Equal(t, 0, len(sanitized), "Length of string should be 0")
}
Пример #25
0
func removeDiacritics(s string) string {
	output, _, _ := transform.String(transform.RemoveFunc(func(r rune) bool {
		return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks
	}), s)
	return output
}
Пример #26
0
func normalizer() transform.Transformer {
	isMn := func(r rune) bool {
		return unicode.Is(unicode.Mn, r) // Mn: nonspacing marks
	}
	return transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC)
}
Пример #27
0
// Transform characters with accents into plan forms
func NeuterAccents(s string) string {
	t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC)
	result, _, _ := transform.String(t, string(s))

	return result
}
Пример #28
0
func NewReader(r io.Reader) io.Reader {
	t := transform.Chain(norm.NFD, transform.RemoveFunc(isMn), norm.NFC)
	return transform.NewReader(r, t)
}
Пример #29
0
// UpdateSanitize updates the NaiveBayes model's
// text sanitization transformation function
func (b *NaiveBayes) UpdateSanitize(sanitize func(rune) bool) {
	b.sanitize = transform.RemoveFunc(sanitize)
}
Пример #30
0
func main() {
	flag.Parse()
	//	panic("Just Quit")
	getHostConfig()
	//	runtime.GOMAXPROCS(2)
	timeout = 1000
	fmt.Println("Feeds")
	//http://careers.stackoverflow.com/jobs/feed?searchTerm=big+data&location=san+francisco&range=100&distanceUnits=Miles
	//	feeds = append(feeds, Feed{index: 0, url: "http://careers.stackoverflow.com/jobs/feed?searchTerm=big+data&location=san+francisco&range=100&distanceUnits=Miles", status: 0, itemCount: 0, complete: false, itemsComplete: false })

	feeds = append(feeds, Feed{index: 0, url: "http://careers.stackoverflow.com/jobs/feed?location=san+francisco%2c+ca&range=100&distanceUnits=Miles", status: 0, itemCount: 0, complete: false, itemsComplete: false})
	feeds = append(feeds, Feed{index: 1, url: "http://careers.stackoverflow.com/jobs/feed?location=new+york+city%2c+ny&range=100&distanceUnits=Miles", status: 0, itemCount: 0, complete: false, itemsComplete: false})
	feeds = append(feeds, Feed{index: 2, url: "http://careers.stackoverflow.com/jobs/feed?location=los+angeles%2c+ca&range=100&distanceUnits=Miles", status: 0, itemCount: 0, complete: false, itemsComplete: false})
	feeds = append(feeds, Feed{index: 3, url: "http://careers.stackoverflow.com/jobs/feed?location=boston%2c+ma&range=100&distanceUnits=Miles", status: 0, itemCount: 0, complete: false, itemsComplete: false})
	feeds = append(feeds, Feed{index: 4, url: "http://careers.stackoverflow.com/jobs/feed?location=seattle%2cwa&range=100&distanceUnits=Miles", status: 0, itemCount: 0, complete: false, itemsComplete: false})
	feeds = append(feeds, Feed{index: 5, url: "http://careers.stackoverflow.com/jobs/feed?location=austin%2ctx&range=100&distanceUnits=Miles", status: 0, itemCount: 0, complete: false, itemsComplete: false})
	feeds = append(feeds, Feed{index: 6, url: "http://careers.stackoverflow.com/jobs/feed?location=chicago%2cil&range=100&distanceUnits=Miles", status: 0, itemCount: 0, complete: false, itemsComplete: false})
	mutex = &sync.Mutex{}
	skillMap = make(map[string]int, 200)
	loadSkillMapFile(skillMap)
	fmt.Println("GetRSS")
	getRSS2()
	saveSkillMapFile(skillMap)
	if conf.hbaseZkURL != "" {
		saveSkillsMapHBase(skillMap)
	}

	for i := 0; i < len(guidList); i++ {
		fmt.Println(guidList[i])
	}

	//	guidList := make([]string, 4)
	//	guidList[0] = "http://careers.stackoverflow.com/jobs/103310/senior-software-engineer-american-society-of-clinical"
	//	guidList[1] = "http://careers.stackoverflow.com/jobs/94152/senior-software-engineer-platform-flixster"
	//	guidList[2] = "http://careers.stackoverflow.com/jobs/103328/senior-full-stack-engineer-data-science-adroll"
	//	guidList[3] = "http://careers.stackoverflow.com/jobs/104086/enterprise-architect-new-relic"
	//	fmt.Printf("%v\n", s)

	// map random times & make s3names
	fw.Slice(guidList).Map(func(sURL string) URLTuple {
		fmt.Printf("Map1: %v\n", sURL)
		fName := "jobs_sof/" + strings.Replace(strings.TrimPrefix(sURL, "http://careers.stackoverflow.com/jobs/"), "/", "_", -1)
		ms := rand.Intn(3000)
		return URLTuple{sURL, fName, ms}
		//	Filter already-acquired URLs
	}).Filter(func(uTuple URLTuple) bool {
		// is file already stored in S3?
		//fmt.Printf("Filter:%s, %v\n", uTuple.s3Name, uTuple)
		svcS3 := s3.New(session.New(&aws.Config{Region: aws.String("us-east-1")}))
		var params *s3.HeadObjectInput

		params = &s3.HeadObjectInput{
			Bucket: aws.String("opps"),        // Required
			Key:    aws.String(uTuple.s3Name), // Required
		}
		hobj, _ := svcS3.HeadObject(params)

		fmt.Printf("Filter: %s => %v\n", uTuple.s3Name, hobj.ContentLength == nil)
		return hobj.ContentLength == nil
		//	get the URLs
	}).Map(func(uTuple URLTuple) statusTuple {
		fmt.Printf("Map3: %v\n", uTuple)
		// random sleep
		time.Sleep(time.Duration(uTuple.msWait) * time.Millisecond)

		// get URL
		resp, err := http.Get(uTuple.gURL)
		if err != nil {
			panic(err)
		}
		defer resp.Body.Close()

		//		fmt.Println("Body:", resp.Body)
		//		fmt.Println("Proto:", resp.Proto)
		//		fmt.Printf("response Status = <%s> / Length = %d\n", resp.Status, resp.ContentLength)
		//		fmt.Println("response Headers:", resp.Header)
		//		fmt.Printf("response %+v:\n", resp)
		//		fmt.Println("response Body:", string(body))
		failed := 0
		passed := 0
		if resp.StatusCode == 200 {
			passed = 1
		} else {
			failed = 1
		}
		// store in S3
		if passed == 1 {
			body, _ := ioutil.ReadAll(resp.Body)
			reader := strings.NewReader(string(body))
			root, err := html.Parse(reader)

			if err != nil {
				fmt.Printf("%+v\n", err)
			}

			var b bytes.Buffer
			html.Render(&b, root)
			fixedHtml := b.String()

			isOk := func(r rune) bool {
				return r < 32 || r >= 127
			}
			// The isOk filter is such that there is no need to chain to norm.NFC
			t2 := transform.Chain(norm.NFKD, transform.RemoveFunc(isOk))
			// This Transformer could also trivially be applied as an io.Reader
			// or io.Writer filter to automatically do such filtering when reading
			// or writing data anywhere.
			fixedUnicodeNFKD, _, _ := transform.String(t2, fixedHtml)

			//			fmt.Println("\n\n\n"+fixedUnicodeNFKD)
			reader = strings.NewReader(fixedUnicodeNFKD)

			xmlroot, xmlerr := xmlpath.ParseHTML(reader)
			if xmlerr != nil {
				log.Fatal(xmlerr)
			}
			//	fmt.Printf("xml root = %+v\n------\n", xmlroot)
			path := &xmlpath.Path{}
			pstr := string("")

			pstr = `/html/head/title`
			path = xmlpath.MustCompile(pstr)
			var ok bool

			title := ""
			if title, ok = path.String(xmlroot); ok {
				//		fmt.Printf("%s: %s\n", pstr, title)
			}
			fmt.Printf("**** Title: %s\n", title)
			var iter *xmlpath.Iter
			var list *xmlpath.Path
			var cnt int

			// Location - needs Trim
			pstr = `//*[@id="hed"]/ul[1]/li/text()`
			path = xmlpath.MustCompile(pstr)
			location := ""
			if location, ok = path.String(xmlroot); ok {
				//		fmt.Printf("Location - %s: %s\n", pstr, strings.Trim(location, " \n"))
				location = strings.Trim(location, " \n")
			}

			// Base Skills - LOOP from 1 until not ok
			var skills []string

			list = xmlpath.MustCompile(`//*[@id="hed"]/div[2]/p/a`)
			iter = list.Iter(xmlroot)
			for iter.Next() {
				ele := iter.Node().String()
				skills = append(skills, ele)
				//		fmt.Printf("Sk-Desc: %s\n", ele)
			}

			var desc []string
			list = xmlpath.MustCompile(`//*[@id="jobdetailpage"]/div[2]/div[1]/div[2]/p`)
			iter = list.Iter(xmlroot)
			for iter.Next() {
				ele := iter.Node().String()
				desc = append(desc, ele)
				//		fmt.Printf("it-Desc1: %s\n", ele)
			}

			list = xmlpath.MustCompile(`//*[@id="jobdetailpage"]/div[2]/div[1]/div[2]/ul/li`)
			iter = list.Iter(xmlroot)
			for iter.Next() {
				ele := iter.Node().String()
				desc = append(desc, ele)
				//		fmt.Printf("it-Desc2: %s\n", ele)
			}

			var sSNR []string
			list = xmlpath.MustCompile(`//*[@id="jobdetailpage"]/div[2]/div[1]/div[3]/p`)
			iter = list.Iter(xmlroot)
			cnt = 0
			for iter.Next() {
				ele := iter.Node().String()
				sSNR = append(sSNR, ele)
				//		fmt.Printf("Skills1 (%d): %s\n", cnt, ele)
				cnt++
			}

			list = xmlpath.MustCompile(`//*[@id="jobdetailpage"]/div[2]/div[1]/div[3]/ul/li/text()`)
			iter = list.Iter(xmlroot)
			cnt = 0
			for iter.Next() {
				ele := iter.Node().String()
				sSNR = append(sSNR, ele)
				//		fmt.Printf("Skills2(%d): %s\n", cnt, ele)
				cnt++
			}

			list = xmlpath.MustCompile(`//*[@id="jobdetailpage"]/div[2]/div[1]/div[3]/ul/li/ul/li/text()`)
			iter = list.Iter(xmlroot)
			cnt = 0
			for iter.Next() {
				ele := iter.Node().String()
				sSNR = append(sSNR, ele)
				//		fmt.Printf("Skills3(%d): %s\n", cnt, ele)
				cnt++
			}
			//
			//    // about company -
			//	pstr = `//*[@id="jobdetailpage"]/div[2]/div[1]/div[4]/p/text()`
			//	//*[@id="jobdetailpage"]/div[2]/div[1]/div[2]/p[2]/text()[1]
			//	path = xmlpath.MustCompile(pstr)
			//	about := ""
			//	if about, ok = path.String(xmlroot); ok {
			//		fmt.Printf("About: %s - %s\n", pstr, about)
			//	}

			var about []string
			list = xmlpath.MustCompile(`//*[@id="jobdetailpage"]/div[2]/div[1]/div[4]/p`)
			//*[@id="jobdetailpage"]/div[2]/div[1]/div[4]/p[2]/text()[1]
			iter = list.Iter(xmlroot)
			cnt = 0
			for iter.Next() {
				ele := iter.Node().String()
				about = append(about, ele)
				//		fmt.Printf("About(%d): %s\n", cnt, ele)
				cnt++
			}

			var sep string

			baseAbout := "ABOUT: "
			sep = ""
			for i := 0; i < len(about); i++ {
				baseAbout += sep + about[i]
				sep = "\n"
			}

			baseSkills := "BASESKILLS: "
			sep = ""
			//	fmt.Printf("base skills = %+v\n", skills)
			for i := 0; i < len(skills); i++ {
				baseSkills += sep + skills[i]
				sep = " "
			}

			baseReqs := "REQUIREMENTS: "
			sep = ""
			for i := 0; i < len(sSNR); i++ {
				baseReqs += sep + sSNR[i]
				sep = "\n"
			}

			baseDesc := "DESCRIPTION: "
			sep = ""
			for i := 0; i < len(desc); i++ {
				baseDesc += sep + desc[i]
				sep = "\n"
			}

			var storage string
			storage =
				uTuple.gURL + "\n\n" +
					"DATE: " + time.Now().Format(time.RFC850) + "\n\n" +
					"TITLE: " + html.UnescapeString(title) + "\n\n" +
					"LOCATION: " + html.UnescapeString(location) + "\n\n" +
					html.UnescapeString(baseSkills) + "\n\n" +
					html.UnescapeString(baseAbout) + "\n\n" +
					html.UnescapeString(baseDesc) + "\n\n" + // no second slash
					html.UnescapeString(baseReqs) + "\n"

			fmt.Printf("Storing (len = %d):\n***\n%s\n***\n", len(storage), storage)

			svcS3 := s3.New(session.New(&aws.Config{Region: aws.String("us-east-1")}))
			bucket := "opps"
			key := uTuple.s3Name
			_, err = svcS3.PutObject(&s3.PutObjectInput{
				Body:   strings.NewReader(string(storage)),
				Bucket: &bucket,
				Key:    &key,
			})
			if err != nil {
				fmt.Printf("Failed to upload data to %s/%s, %s\n", bucket, key, err)
				failed = 1
				passed = 0
			}
		}
		//		return statusTuple{passed, failed}
		return statusTuple{passed, failed}
		// count URLs
	}).Reduce(func(x statusTuple, y statusTuple) statusTuple {
		fmt.Printf("Red1: x= %v, y = %v\n", x, y)
		return statusTuple{x.pass + y.pass, x.fail + y.fail}
	}).Map(func(x statusTuple) {
		fmt.Printf("Map4 Result: passed = %d, failed = %d\n", x.pass, x.fail)
	}).Run()

}