Exemple #1
0
func main() {
	flag.Parse()
	fmt.Println("Debian Code Search indexing tool")

	ix := make([]*index.IndexWriter, *numShards)

	if !*dry {
		for i := 0; i < *numShards; i++ {
			path := fmt.Sprintf("%s/index.%d.idx", *indexShardPath, i)
			ix[i] = index.Create(path)
			ix[i].Verbose = true
		}
	}

	skiplen := len(*unpackedPath)
	if (*unpackedPath)[len(*unpackedPath)-1] != '/' {
		skiplen += 1
	}

	// Walk through all the directories and add files matching our source file
	// regular expression to the index.

	cnt := 0
	filepath.Walk(*unpackedPath,
		func(path string, info os.FileInfo, err error) error {
			if dir, filename := filepath.Split(path); filename != "" {
				// Skip quilt’s .pc directories and "po" directories (localization)
				if info.IsDir() &&
					(filename == ".pc" ||
						filename == "po" ||
						filename == ".git") {
					return filepath.SkipDir
				}

				// NB: we don’t skip "configure" since that might be a custom shell-script
				// Skip documentation and configuration files.
				// NB: we actually skip some autotools files because they blow up our index otherwise
				// TODO: peek inside the files (we’d have to read them anyways) and check for messages that indicate that the file is generated. either by autoconf or by bison for example.
				if filename == "NEWS" ||
					filename == "COPYING" ||
					filename == "LICENSE" ||
					filename == "CHANGES" ||
					filename == "Makefile.in" ||
					filename == "ltmain.sh" ||
					filename == "config.guess" ||
					filename == "config.sub" ||
					filename == "depcomp" ||
					filename == "aclocal.m4" ||
					filename == "libtool.m4" ||
					filename == ".gitignore" ||
					strings.HasSuffix(filename, ".conf") ||
					// spell checking dictionaries
					strings.HasSuffix(filename, ".dic") ||
					strings.HasSuffix(filename, ".cfg") ||
					strings.HasSuffix(filename, ".man") ||
					strings.HasSuffix(filename, ".xml") ||
					strings.HasSuffix(filename, ".xsl") ||
					strings.HasSuffix(filename, ".html") ||
					strings.HasSuffix(filename, ".sgml") ||
					strings.HasSuffix(filename, ".pod") ||
					strings.HasSuffix(filename, ".po") ||
					strings.HasSuffix(filename, ".txt") ||
					strings.HasSuffix(filename, ".tex") ||
					strings.HasSuffix(filename, ".rtf") ||
					strings.HasSuffix(filename, ".docbook") ||
					strings.HasSuffix(filename, ".symbols") ||
					// Don’t match /debian/changelog or /debian/README, but
					// exclude changelog and readme files generally.
					(!strings.HasSuffix(dir, "/debian/") &&
						strings.HasPrefix(strings.ToLower(filename), "changelog") ||
						strings.HasPrefix(strings.ToLower(filename), "readme")) ||
					hasManpageSuffix(filename) {
					if *dry {
						log.Printf("skipping %s\n", filename)
					}
					return nil
				}
			}

			if info == nil || !info.Mode().IsRegular() {
				return nil
			}

			// Some filenames (e.g.
			// "xblast-tnt-levels_20050106-2/reconstruct\xeeon2.xal") contain
			// invalid UTF-8 and will break when sending them via JSON later
			// on. Filter those out early to avoid breakage.
			if !utf8.ValidString(path) {
				log.Printf("Skipping due to invalid UTF-8: %s\n", path)
				return nil
			}

			// We strip the unpacked directory path plus the following
			// slash, e.g. /dcs-ssd/unpacked plus /
			indexname := path[skiplen:]
			if *dry {
				log.Printf("adding %s as %s\n", path, indexname)
			} else {
				ix[cnt%*numShards].AddFile(path, indexname)
				cnt++
			}
			return nil
		})
	if !*dry {
		for i := 0; i < *numShards; i++ {
			ix[i].Flush()
		}
	}
	os.Exit(0)
}
Exemple #2
0
func indexPackage(pkg string) {
	log.Printf("Indexing %s\n", pkg)
	unpacked := filepath.Join(tmpdir, pkg, pkg)
	if err := os.MkdirAll(*unpackedPath, os.FileMode(0755)); err != nil {
		log.Fatalf("Could not create directory: %v\n", err)
	}

	// Write to a temporary file first so that merges can happen at the same
	// time. If we don’t do that, merges will try to use incomplete index
	// files, which are interpreted as corrupted.
	tmpIndexPath := filepath.Join(*unpackedPath, pkg+".tmp")
	index := index.Create(tmpIndexPath)
	// +1 because of the / that should not be included in the index.
	stripLen := len(filepath.Join(tmpdir, pkg)) + 1

	filepath.Walk(unpacked,
		func(path string, info os.FileInfo, err error) error {
			if dir, filename := filepath.Split(path); filename != "" {
				skip := ignored(info, dir, filename)
				if *debugSkip && skip != nil {
					log.Printf("Skipping %q: %v", path, skip)
				}
				if skip != nil && info.IsDir() {
					if err := os.RemoveAll(path); err != nil {
						log.Fatalf("Could not remove directory %q: %v\n", path, err)
					}
					return filepath.SkipDir
				}
				if skip != nil && !info.IsDir() {
					if err := os.Remove(path); err != nil {
						log.Fatalf("Could not remove file %q: %v\n", path, err)
					}
					return nil
				}
			}

			if info == nil || !info.Mode().IsRegular() {
				return nil
			}

			// Some filenames (e.g.
			// "xblast-tnt-levels_20050106-2/reconstruct\xeeon2.xal") contain
			// invalid UTF-8 and will break when sending them via JSON later
			// on. Filter those out early to avoid breakage.
			if !utf8.ValidString(path) {
				log.Printf("Skipping due to invalid UTF-8: %s\n", path)
				return nil
			}

			if err := index.AddFile(path, path[stripLen:]); err != nil {
				log.Printf("Could not index %q: %v\n", path, err)
				if err := os.Remove(path); err != nil {
					log.Fatalf("Could not remove file %q: %v\n", path, err)
				}
			} else {
				// Copy this file out of /tmp to our unpacked directory.
				outputPath := filepath.Join(*unpackedPath, path[stripLen:])
				if err := os.MkdirAll(filepath.Dir(outputPath), os.FileMode(0755)); err != nil {
					log.Fatalf("Could not create directory: %v\n", err)
				}
				output, err := os.Create(outputPath)
				if err != nil {
					log.Fatalf("Could not create output file %q: %v\n", outputPath, err)
				}
				defer output.Close()
				input, err := os.Open(path)
				if err != nil {
					log.Fatalf("Could not open input file %q: %v\n", path, err)
				}
				defer input.Close()
				if _, err := io.Copy(output, input); err != nil {
					log.Fatalf("Could not copy %q to %q: %v\n", path, outputPath, err)
				}
			}
			return nil
		})

	index.Flush()

	finalIndexPath := filepath.Join(*unpackedPath, pkg+".idx")
	if err := os.Rename(tmpIndexPath, finalIndexPath); err != nil {
		log.Fatal(err)
	}
	successfulPackageIndexes.Inc()
}
Exemple #3
0
func indexPackage(pkg string) {
	unpacked := filepath.Join(tmpdir, pkg, pkg)
	index := index.Create(filepath.Join(*unpackedPath, pkg+".idx"))
	stripLen := len(filepath.Join(tmpdir, pkg))

	filepath.Walk(unpacked,
		func(path string, info os.FileInfo, err error) error {
			if dir, filename := filepath.Split(path); filename != "" {
				skip := ignored(info, dir, filename)
				if skip && info.IsDir() {
					if err := os.RemoveAll(path); err != nil {
						log.Fatalf("Could not remove directory %q: %v\n", path, err)
					}
					return filepath.SkipDir
				}
				if skip && !info.IsDir() {
					if err := os.Remove(path); err != nil {
						log.Fatalf("Could not remove file %q: %v\n", path, err)
					}
					return nil
				}
			}

			if info == nil || !info.Mode().IsRegular() {
				return nil
			}

			// Some filenames (e.g.
			// "xblast-tnt-levels_20050106-2/reconstruct\xeeon2.xal") contain
			// invalid UTF-8 and will break when sending them via JSON later
			// on. Filter those out early to avoid breakage.
			if !utf8.ValidString(path) {
				log.Printf("Skipping due to invalid UTF-8: %s\n", path)
				return nil
			}

			if err := index.AddFile(path, path[stripLen:]); err != nil {
				if err := os.Remove(path); err != nil {
					log.Fatalf("Could not remove file %q: %v\n", path, err)
				}
			} else {
				// Copy this file out of /tmp to our unpacked directory.
				outputPath := filepath.Join(*unpackedPath, path[stripLen:])
				if err := os.MkdirAll(filepath.Dir(outputPath), os.FileMode(0755)); err != nil {
					log.Fatalf("Could not create directory: %v\n", err)
				}
				output, err := os.Create(outputPath)
				if err != nil {
					log.Fatalf("Could not create output file %q: %v\n", outputPath, err)
				}
				defer output.Close()
				input, err := os.Open(path)
				if err != nil {
					log.Fatalf("Could not open input file %q: %v\n", path, err)
				}
				defer input.Close()
				if _, err := io.Copy(output, input); err != nil {
					log.Fatalf("Could not copy %q to %q: %v\n", path, outputPath, err)
				}
			}
			return nil
		})

	index.Flush()
}