func readAllFiles(numReaders int, textFileCh chan string, fileCh chan *dm.File, ctrl GoRtnCntrl) {
	defer ctrl.wg.Done()
	nestedCtrl := ctrl
	nestedCtrl.wg = &sync.WaitGroup{}
	numReaders = dm.MaxInt(1, numReaders)
	for i := 0; i < numReaders; i++ {
		nestedCtrl.wg.Add(1)
		go readFiles(textFileCh, fileCh, nestedCtrl)
	}
	nestedCtrl.wg.Wait()
	close(fileCh)
}
func expandArgsToTextFiles(
	args []string, textFileCh chan string, numFilters int,
	ftiMgr *FileTypeInfoManager, ctrl GoRtnCntrl) {
	defer ctrl.wg.Done()

	nestedCtrl := ctrl
	nestedCtrl.wg = &sync.WaitGroup{}
	candidateCh := make(chan string, runtime.NumCPU())

	nestedCtrl.wg.Add(1)
	go expandArgs(args, candidateCh, nestedCtrl)

	numFilters = dm.MaxInt(1, numFilters)
	for i := 0; i < numFilters; i++ {
		nestedCtrl.wg.Add(1)
		go filterCandidates(candidateCh, textFileCh, ftiMgr, nestedCtrl)
	}

	nestedCtrl.wg.Wait()
	close(textFileCh)
}
func MakeFileTypeInfoManager(ctrl GoRtnCntrl) *FileTypeInfoManager {
	p := &FileTypeInfoManager{
		requestCh: make(chan *FileTypeInfoRequest, runtime.NumCPU()),
		ctrl:      ctrl,
	}

	stopCh := ctrl.stopCh

	sendResponse := func(req *FileTypeInfoRequest, isText bool, updateCh chan *FileTypeInfoUpdate) {
		resp := &FileTypeInfoResponse{
			fp:     req.fp,
			isText: isText,
		}
		var update *FileTypeInfoUpdate
		if updateCh != nil {
			update = &FileTypeInfoUpdate{
				ext:    req.ext,
				isText: isText,
			}
		}
		respCh := req.respCh
		for !(respCh == nil && updateCh == nil) {
			// Do whichever is first possible.
			select {
			case <-stopCh:
				return
			case respCh <- resp:
				respCh = nil
			case updateCh <- update:
				updateCh = nil
			}
		}
	}

	// Create some go routines that will handle actually reading files
	// to determine if they are text or not.
	checkFileContentCh := make(chan *FileTypeInfoRequest, runtime.NumCPU())
	updateCh := make(chan *FileTypeInfoUpdate, runtime.NumCPU())
	checkFileContentGR := func() {
		defer ctrl.wg.Done()
		for {
			select {
			case <-stopCh:
				return
			case req, ok := <-checkFileContentCh:
				if !ok {
					// No more requests.
					return
				}
				isText := fileContainsText(req.fp)
				sendResponse(req, isText, updateCh)
			}
		}
	}

	numGoRoutines := dm.MaxInt(1, runtime.NumCPU())
	for ; numGoRoutines > 0; numGoRoutines-- {
		ctrl.wg.Add(1)
		go checkFileContentGR()
	}

	// And create a go routine that will handle requests for file type.
	ctrl.wg.Add(1)
	go func() {
		defer ctrl.wg.Done()
		textTypes := make(map[string]int)
		nonTextTypes := make(map[string]int)
		defer func() {
			p.textTypes = textTypes
			p.nonTextTypes = nonTextTypes
		}()
		guesstimateType := func(ext string) (isText, isUnknown bool) {
			if ext == "" {
				isUnknown = true
				return
			}
			if v, ok := knownExtensions[ext]; ok {
				isText = v
				return
			}
			// Take a guess based on past files.
			ntt, tt := nonTextTypes[ext], textTypes[ext]
			if (ntt == 0 && tt > 10) || (tt-ntt > 20) {
				// Probably text.
				glog.V(1).Infof("Past evidence (%d, %d) indicates a text extension: %s", ntt, tt, ext)
				isText = true
				return
			} else if (tt == 0 && ntt > 10) || (ntt-tt > 20) {
				// Probably binary.
				glog.V(1).Infof("Past evidence (%d, %d) indicates a binary extension: %s", ntt, tt, ext)
				return
			}
			isText, isUnknown = fileExtImpliesText(ext)
			return
		}
		for {
			select {
			case <-stopCh:
				return

			case update := <-updateCh:
				if update.isText {
					textTypes[update.ext]++
				} else {
					nonTextTypes[update.ext]++
				}

			case req, ok := <-p.requestCh:
				if !ok {
					return
				}
				isText, isUnknown := guesstimateType(req.ext)
				if isUnknown {
					// Ask another go routine to read the file in order to figure out
					// the answer. That routine will take care of providing the answer
					// to
					select {
					case <-stopCh:
						return
					case checkFileContentCh <- req:
					}
				} else {
					sendResponse(req, isText, nil)
				}
			}
		}
	}()
	return p
}
Exemple #4
0
func makeLineInstance(file File, start FileOffset, lineNumber LineNo,
	lineBytes []byte) *lineInstance {
	length := len(lineBytes)
	// There are around 20 space characters in Unicode, but we're only handling
	// ASCII tab and space characters here.
	n := 0
	for ; n < length; n++ {
		if lineBytes[n] != '\t' {
			break
		}
	}
	tabCount := n
	for ; n < length; n++ {
		if lineBytes[n] != ' ' {
			break
		}
	}
	spaceCount := n - tabCount
	// Content starts after leading whitespace, any mixture of spaces and tabs.
	for ; n < length; n++ {
		b := lineBytes[n]
		if b != ' ' && b != '\t' {
			break
		}
	}
	contentIndex := n
	if contentIndex > tabCount+spaceCount {
		// Line doesn't start with "well-formed" indentation, so we can't as readily
		// compare indentations. So, mark this line such that we can detect this.
		tabCount = 255
		spaceCount = 255
	}
	// Search from the end for the first non-whitespace character.
	n = length - 1
TrailingLoop:
	for ; n >= contentIndex; n-- {
		switch lineBytes[n] {
		case ' ', '\n', '\r', '\t', '\f', '\v':
			// Whitespace.
			continue
		default:
			break TrailingLoop
		}
	}
	contendEndIndex := dm.MaxInt(contentIndex, n) + 1
	contentBytes := lineBytes[contentIndex:contendEndIndex]
	contentLength := len(contentBytes)
	lineHash, contentHash := theLineHasher.Compute2(lineBytes, contentBytes)

	p := &lineInstance{
		file:          file,
		lineNumber:    lineNumber,
		start:         start,
		end:           start + FileOffset(length),
		contentStart:  start + FileOffset(contentIndex),
		contentLength: FileOffset(contentLength),
		hash:          HashType(lineHash),
		contentHash:   HashType(contentHash),
		leadingTabs:   uint8(dm.MinInt(255, tabCount)),
		leadingSpaces: uint8(dm.MinInt(255, spaceCount)),
		lineType:      normalLine,
	}

	if contentLength == 0 || dm.ComputeIsProbablyCommon(contentBytes) {
		p.lineType = wellKnownLine
	}

	return p
}