//refine the candidate set based on the questions and answers provided func refine(answers *rx.BitSet, qns []string, cand []*rx.DFA, expns []*rx.RegExParsed, dex []int) ([]*rx.DFA, []*rx.RegExParsed, []int) { remain := make([]*rx.DFA, 0, len(cand)) express := make([]*rx.RegExParsed, 0, len(expns)) index := make([]int, 0, len(expns)) //iterate through all candidates for i := 0; i < len(cand); i++ { test := new(rx.BitSet) //initialize new bitset for j := 0; j < len(qns); j++ { //ask all the questions if cand[i].Accepts(qns[j]) != nil { test.Set(j) //record answers } } if test.Equals(answers) { //if test matches answers, keep this DFA remain = append(remain, cand[i]) express = append(express, expns[i]) if len(dex) == 0 { index = append(index, i) } else { index = append(index, dex[i]) if len(cand) < 20 { fmt.Printf("%d ", dex[i]) } } } } fmt.Printf("\n") return remain, express, index }
//find the expression that corresponds to the question word func findExpr(nextQn *rx.BitSet, e []*RegEx) string { b := new(rx.BitSet) for i := 0; i < len(e); i++ { b.Set(e[i].Index) if b.Equals(nextQn) { return e[i].Rexpr } b.Clear(e[i].Index) } return "" }
//determines best question by the number of candidates that accept the question word. func askBulk2(answers *rx.BitSet, qns []string, perm []int, cand []*rx.DFA, expns []*rx.RegExParsed, numPerGrp int) (*rx.BitSet, []string) { numGroups := math.Floor(float64(len(cand) / numPerGrp)) word := "" if len(cand) < numPerGrp { numPerGrp = len(cand) } fmt.Println("length cand = ", len(cand)) subjlist := make([]int, 0, *group) subjtrees := make([]rx.Node, 0, *group) expressions := make([]*RegEx, 0, len(cand)) numKeep := make([]int, 0, int(numGroups)) overlap := make([]*rx.BitSet, 0, int(numGroups)) it := 0 subjlist = make([]int, 0, *group) subjtrees = make([]rx.Node, 0, *group) expressions = make([]*RegEx, 0, len(cand)) //create the grouping for j := it * int(numPerGrp); j < (it+1)*numPerGrp; j++ { subjlist = append(subjlist, perm[j]) subjtrees = append(subjtrees, cand[perm[j]].Tree) expressions = append(expressions, &RegEx{perm[j], expns[perm[j]].Expr}) fmt.Printf("rx { %d } %s \n", perm[j], expns[perm[j]].Expr) } //construct multi DFA fmt.Printf("Constructing multiDFA\n") dfa := rx.MultiDFA(subjtrees) //dfa = dfa.Minimize() h := dfa.Synthesize() for _, ex := range h { fmt.Printf("eg %s %s\n", ex.RXset, ex.Example) } u := make([]*rx.BitSet, 0, len(h)) for i := 0; i < len(h); i++ { u = append(u, h[i].RXset) } //candidates alive := new(rx.BitSet) for i := 0; i < len(expressions); i++ { alive.Set(expressions[i].Index) } total := 0 max := 0 diff := float64(len(cand)) half := math.Floor(float64(len(cand) / 2)) maxWord := "" over := new(rx.BitSet) prevWord := make([]string, 0, len(cand)) //iterate through all example words generated for j := 0; j < len(h); j++ { total = 0 over = new(rx.BitSet) //tracks which candidates accept the question word //iterate through all candidates for i := 0; i < len(cand); i++ { //if the candidate accepts the example word, increment total and update bitset if cand[i].Accepts(h[j].Example) != nil { total++ over.Set(i) } } //order question words according to the number of candidates that accept them if math.Abs(float64(total)-half) < float64(diff) { diff = math.Abs(float64(total) - half) max = total maxWord = h[j].Example prevWord = append(prevWord, maxWord) overlap = append(overlap, over) } } fmt.Println("max = ", max, " max word: ", maxWord) qns = append(qns, maxWord) numKeep = append(numKeep, int(max)) fmt.Println("alive =", len(cand), ", max eliminate = ", max, ", maxWord =", maxWord) fmt.Printf("\n") answers = new(rx.BitSet) maybe := 0 for i := 0; i < len(qns); i++ { //if answer is maybe, ask the next question in the list begin: if maybe > 0 { if len(prevWord) == 1 { qns[i] = h[maybe].Example fmt.Println("Does your language accept this word: ", h[maybe].Example) } else { qns[i] = prevWord[len(prevWord)-maybe-1] fmt.Printf("Does your language accept this word: %s, max over = %s\n", prevWord[len(prevWord)-maybe-1], overlap[len(overlap)-maybe-1].String()) } } else { fmt.Println("Does your language accept this word: ", qns[i]) } var input string _, e1 := fmt.Scan(&input) if e1 != nil { fmt.Printf("err = %#v\n", e1) os.Exit(2) } if input == "Yes" || input == "y" || input == "Y" || input == "yes" { answers.Set(i) } else if input == "No" || input == "N" || input == "n" || input == "no" { } else if input == "maybe" || input == "m" || input == "M" { maybe++ goto begin } else { fmt.Println("That is not a valid expression, please try again.") fmt.Println("Does your language accept this word: ", word) goto begin } } fmt.Println("answers = ", answers.String()) for i := 0; i < len(qns); i++ { fmt.Println(qns[i]) } return answers, qns }
//used when candidate set partitioned. Asks the best question from each partition and records answers func askBulk(answers *rx.BitSet, qns []string, perm []int, cand []*rx.DFA, expns []*rx.RegExParsed, numPerGrp int) (*rx.BitSet, []string) { numGroups := math.Floor(float64(len(cand) / numPerGrp)) fmt.Println("num groups = ", numGroups) word := "" //modTest := math.Mod(float64(len(cand)), numGroups) //fmt.Println("testMod = ", modTest) subjlist := make([]int, 0, *group) subjtrees := make([]rx.Node, 0, *group) expressions := make([]*RegEx, 0, len(cand)) for it := 0; it < int(numGroups/2); it++ { subjlist = make([]int, 0, *group) subjtrees = make([]rx.Node, 0, *group) expressions = make([]*RegEx, 0, len(cand)) //create the grouping for j := it * int(numPerGrp); j < (it+1)*numPerGrp; j++ { subjlist = append(subjlist, perm[j]) subjtrees = append(subjtrees, cand[perm[j]].Tree) expressions = append(expressions, &RegEx{perm[j], expns[perm[j]].Expr}) fmt.Printf("rx { %d } %s\n", perm[j], expns[perm[j]].Expr) } //construct multiDFA fmt.Printf("Constructing multiDFA it = %d\n", it) dfa := rx.MultiDFA(subjtrees) //dfa = dfa.Minimize() h := dfa.Synthesize() //for _, ex := range h { //fmt.Printf("eg %s %s\n", ex.RXset, ex.Example) //} u := make([]*rx.BitSet, 0, len(h)) //possible questions for i := 0; i < len(h); i++ { u = append(u, h[i].RXset) } //candidate set alive := new(rx.BitSet) for i := 0; i < len(expressions); i++ { alive.Set(expressions[i].Index) } //fmt.Println("alive = ", alive.String()); //find best question and ask it i := 0 n := float64(alive.Count() / 2) size := math.Floor(n) if alive.Count() < 4 && math.Mod(float64(alive.Count()), 2) == 1 { size = size + 1 } cur := u[i].Count() c := float64(cur) hM := float64(math.Abs(c - size)) word = h[i].Example for i := 0; i < len(u); i++ { c = float64(u[i].Count()) if math.Abs(c-size) == 0 { word = h[i].Example break } else if math.Abs(c-size) < hM { hM = math.Abs(c - size) word = h[i].Example } else { /*do nothing*/ } } fmt.Println("before qns append") qns = append(qns, word) } //rate questions and print rating rate := rateQns(qns, cand) for i := 0; i < len(rate); i++ { fmt.Println(rate[i], " dfas accepted this word ", qns[i]) } for i := 0; i < len(qns); i++ { begin: //ask questions fmt.Println("Does your language accept this word (): ", qns[i]) var input string _, e1 := fmt.Scan(&input) if e1 != nil { fmt.Printf("err = %#v\n", e1) os.Exit(2) } if input == "Yes" || input == "y" || input == "Y" || input == "yes" { answers.Set(i) } else if input == "No" || input == "N" || input == "n" || input == "no" { } else { fmt.Println("That is not a valid expression, please try again.") fmt.Println("Does your language accept this word: ", word) goto begin } } fmt.Println("answers = ", answers.String()) return answers, qns }
// main control func main() { filename := cmdline() // process command line maxEx := *numElem * 4 exprs, trees := load(filename) // load expressions qns := make([]string, 0, maxEx) //create list of question words answers := new(rx.BitSet) //create list of answers dfaList := make([]*rx.DFA, 0, len(trees)) //list of candidate DFAs ind := make([]int, 0, len(dfaList)) //index array to track id of each regex a := 0 init := 0 if *mode == 1 { //run with examples //ask for examples fmt.Println("Enter some examples: ") //read examples r := bufio.NewReader(os.Stdin) line, err := r.ReadString(delim) if err != nil { fmt.Println(err) os.Exit(1) } for line != string('\n') { line = line[:len(line)-1] qns = append(qns, line) answers.Set(a) a++ line, err = r.ReadString(delim) if err != nil { fmt.Println(err) os.Exit(1) } } //ask for counter examples fmt.Println("\nEnter some counter-examples: ") rr := bufio.NewReader(os.Stdin) //read counter examples line, err = rr.ReadString(delim) if err != nil { fmt.Println(err) os.Exit(1) } for line != string('\n') { line = line[:len(line)-1] qns = append(qns, line) line, err = rr.ReadString(delim) if err != nil { fmt.Println(err) os.Exit(1) } } fmt.Printf("Processing ...\n") //build the DFAs of all candidates for i := 0; i < len(trees); i++ { aDfa := rx.BuildDFA(trees[i]) dfaList = append(dfaList, aDfa) //fmt.Printf("rx { %d } %s\n", i, exprs[i].Expr) } fmt.Printf("\n") //refine the candidate set based on examples and counter examples dfaList, exprs, ind = refine(answers, qns, dfaList, exprs, ind) init++ fmt.Printf("Length of dfaList: %d\n", len(dfaList)) for i := 0; i < len(ind); i++ { fmt.Printf("rx { %d } %s\n", ind[i], exprs[i].Expr) } } else if *mode == 2 { //partition numPerGroup := *numElem //build DFAs of all candidates for i := 0; i < len(trees); i++ { aDfa := rx.BuildDFA(trees[i]) dfaList = append(dfaList, aDfa) //fmt.Printf("rx { %d } %s\n", i, exprs[i].Expr) } times := 0 for len(dfaList) > 50 { permArray := rand.Perm(len(dfaList)) //randomly permute candidate set qns = make([]string, 0, maxEx) //initialize new set of questions answers = new(rx.BitSet) //initialize new set of answers //ask best question in each partition. Return questions and answers answers, qns = askBulk(answers, qns, permArray, dfaList, exprs, numPerGroup) fmt.Println("Here length of dfaList =", len(dfaList)) //refine the candidate set based on questions ans answers dfaList, exprs, ind = refine(answers, qns, dfaList, exprs, ind) init++ fmt.Printf("Length of dfaList: %d\n", len(dfaList)) times++ } } else if *mode == 3 { //ask all DFAs numPerGroup := *numElem //build DFA's of all candidates for i := 0; i < len(trees); i++ { aDfa := rx.BuildDFA(trees[i]) dfaList = append(dfaList, aDfa) fmt.Printf("rx { %d } %s\n", i, exprs[i].Expr) } times := 0 for len(dfaList) > 50 { //fmt.Println("iteration = ", times) //default partition is 20 per group permArray := rand.Perm(len(dfaList)) //randomly permute candidate set qns = make([]string, 0, maxEx) //initialize new set of questions answers = new(rx.BitSet) //initialize new set of answers //ask best question based on number of DFAs that accept question word answers, qns = askBulk2(answers, qns, permArray, dfaList, exprs, numPerGroup) fmt.Println("Here length of dfaList =", len(dfaList)) //fmt.Printf("\n") //refine candidate set based on questions and answers dfaList, exprs, ind = refine(answers, qns, dfaList, exprs, ind) init++ fmt.Printf("Length of dfaList: %d\n", len(dfaList)) times++ } } else { //mode = 0, run basic algorithm //build DFA's of all candidates for i := 0; i < len(trees); i++ { aDfa := rx.BuildDFA(trees[i]) dfaList = append(dfaList, aDfa) //fmt.Printf("rx { %d } %s\n", i, exprs[i].Expr) } } param := 1 value := *numElem tempLen := 0 done := false numPerGroup := 5 for len(dfaList) > param && !done { subjlist := make([]int, 0, *group) subjtrees := make([]rx.Node, 0, *group) expressions := make([]*RegEx, 0, len(dfaList)) if init == 0 { //if this is the first iteration, mode = 0 //pick a random subset for i := 0; i < *group && i < len(dfaList); i++ { j := rand.Intn(len(dfaList)) // naive; can duplicate subjlist = append(subjlist, j) subjtrees = append(subjtrees, dfaList[j].Tree) expressions = append(expressions, &RegEx{j, exprs[j].Expr}) fmt.Printf("rx { %d } %s\n", j, exprs[j].Expr) } init++ } else if len(dfaList) < value { //if the number of dfa's is small enough, run qns on all remaining for i := 0; i < len(dfaList); i++ { //j := rand.Intn(len(dfaList)) // naive; can duplicate subjlist = append(subjlist, i) subjtrees = append(subjtrees, dfaList[i].Tree) expressions = append(expressions, &RegEx{ind[i], exprs[i].Expr}) fmt.Printf("rx { %d } %s\n", ind[i], exprs[i].Expr) } } else { //if number of dfa's is too large, do nothing } qns = make([]string, 0, maxEx) //initialize new set of questions answers = new(rx.BitSet) //initialize new set of answers permArray := rand.Perm(len(dfaList)) //randomly permute candidate DFAs fmt.Println("len dfa list = ", len(dfaList)) //ask best question based on number of candidates that accept question word answers, qns = askBulk2(answers, qns, permArray, dfaList, exprs, numPerGroup) fmt.Printf("\n") fmt.Println("before length of dfaList = ", len(dfaList)) tempLen = len(dfaList) //store current length //refine the candidate set based on questions and answers dfaList, exprs, ind = refine(answers, qns, dfaList, exprs, ind) fmt.Println("length of dfaList = ", len(dfaList)) //if refinement did not change candidate list then we have equivalent reg exp if tempLen == len(dfaList) { done = true fmt.Println("Main: There are ", len(dfaList), " reg exs that match your query and they are equivalent:") for i := 0; i < len(dfaList); i++ { fmt.Printf("rx { %d } %s\n ", ind[i], exprs[i].Expr) } } } //if one remaining reg ex, print it if len(dfaList) == 1 { fmt.Printf("\nMain: The reg ex you are looking for is: rx { %d } %s\n\nMETA DATA: \n", ind[0], exprs[0].Expr) // print accumulated metadata exprs[0].ShowMeta(os.Stdout, "") fmt.Printf("\n") } //if no remaining reg exs, no match in our library if len(dfaList) == 0 { fmt.Printf("Main: No reg ex in our library matches your query.\n") } }
//original recursive algorithm that attempts to eliminate half candidates with each question func askQuestions(u []*rx.BitSet, h []rx.DFAexample, e []*RegEx, alive *rx.BitSet, track int, iter int, answers *rx.BitSet, qns []string, cand []*rx.DFA) ([]*rx.BitSet, []rx.DFAexample, *rx.BitSet, []string) { t := make([]*rx.BitSet, 0, len(u)) k := make([]rx.DFAexample, 0, len(h)) //none left alive, means there is no match in the corpus if alive.Count() == 0 { fmt.Println("No reg ex in our library matches your query.") fmt.Println("answers after iter = ", iter, " ", answers.String()) return t, k, answers, qns } //if there was at least 1 yes answer and only 1 remaining expr alive, it must be accepted by the target if track >= 1 { if alive.Count() == 1 { if findQn(alive, h) != "" { answers.Set(iter) qns = append(qns, findQn(alive, h)) } fmt.Println("answers after iter = ", iter, " ", answers.String()) return t, k, answers, qns } } //if there were 0 yes answers and there is 1 remaining, must ask again or return no match if track == 0 && alive.Count() == 1 { if len(u) == 0 { fmt.Println("No reg ex in our library matches your query.") fmt.Println("answers after iter = ", iter, " ", answers.String()) return t, k, answers, qns } else { goto askAgain } askAgain: //ask last question qns = append(qns, h[0].Example) fmt.Println("Does your language accept this word: ", h[0].Example) var input1 string _, e1 := fmt.Scan(&input1) if e1 != nil { fmt.Printf("err = %#v\n", e1) os.Exit(2) } if input1 == "Yes" || input1 == "y" || input1 == "Y" || input1 == "yes" { track++ answers.Set(iter) fmt.Println("answers after iter = ", iter, " ", answers.String()) return t, k, answers, qns } else if input1 == "No" || input1 == "N" || input1 == "n" || input1 == "no" { fmt.Println("No reg ex in our library matches your query.") fmt.Println("answers after iter = ", iter, " ", answers.String()) return t, k, answers, qns } else { fmt.Println("That is not a valid expression, please try again.") goto askAgain } } //if 0 yes answers, and no more questions, no match found if track == 0 && len(u) == 0 { fmt.Println("No reg ex in our library matches your query.") return t, k, answers, qns } //if no more questions but more than 1 candidate, find the next question if len(u) == 0 && alive.Count() > 1 { qns = append(qns, findQn(alive, h)) return t, k, answers, qns } //Iterate through u to find the rx.BitSet with numleft/2 bits set to 1 i := 0 n := float64(alive.Count() / 2) size := math.Floor(n) //if small enough set, take the cieling if alive.Count() < 4 && math.Mod(float64(alive.Count()), 2) == 1 { size = size + 1 } cur := u[i].Count() c := float64(cur) hM := float64(math.Abs(c - size)) nextQn := u[i] word := h[i].Example examInt := i //find the region that is the intersection of n/2 regexs (or closest to n/2) for i := 0; i < len(u); i++ { c = float64(u[i].Count()) if math.Abs(c-size) == 0 { nextQn = u[i] word = h[i].Example examInt = i break } else if math.Abs(c-size) < hM { hM = math.Abs(c - size) nextQn = u[i] word = h[i].Example examInt = i } else { /*do nothing*/ } } qns = append(qns, word) //append question to list rate := rateQns(qns, cand) //rate the questions for i := 0; i < len(rate); i++ { fmt.Println(rate[i], " dfas accepted this word ", qns[i]) } //ask if the best question word is accepted fmt.Println("Does your language accept this word: ", word) begin: var input string _, e1 := fmt.Scan(&input) if e1 != nil { fmt.Printf("err = %#v\n", e1) os.Exit(2) } //Refine the candidate set based on the answer if input == "Yes" || input == "y" || input == "Y" || input == "yes" { track++ if nextQn.Count() == 1 { t = append(t, nextQn) k = append(k, h[examInt]) answers.Set(iter) fmt.Println("answers after iter = ", iter, " ", answers.String()) return t, k, answers, qns } answers.Set(iter) alive = alive.And(nextQn) for i := 0; i < len(u); i++ { if !((nextQn.And(u[i])).IsEmpty()) && !(nextQn.Equals(u[i])) { t = append(t, u[i]) k = append(k, h[i]) } } fmt.Println("alive =", alive.String()) } else if input == "No" || input == "N" || input == "n" || input == "no" { bs := new(rx.BitSet) for i := 0; i < len(e); i++ { if !(nextQn.Test(e[i].Index)) { bs.Set(e[i].Index) } } alive = bs.And(alive) for i := 0; i < len(u); i++ { if !((bs.And(u[i])).IsEmpty()) { t = append(t, u[i]) k = append(k, h[i]) } } fmt.Println("alive = ", alive.String()) } else { fmt.Println("That is not a valid expression, please try again.") fmt.Println("Does your language accept this word: ", word) goto begin } fmt.Println("answers after iter = ", iter, " ", answers.String()) iter++ //recursive call on the new candidate set return askQuestions(t, k, e, alive, track, iter, answers, qns, cand) }