String to RegExp working almost fully, does not do recursive parantheses but shrug

This commit is contained in:
Yorick Rommers
2017-06-11 20:39:56 +02:00
parent d11c23841c
commit e1273ab65e
+69 -408
View File
@@ -1,7 +1,6 @@
package com.imegumii;
import javafx.scene.Parent;
import jdk.nashorn.internal.runtime.regexp.joni.Regex;
import sun.misc.Regexp;
import java.util.*;
@@ -42,104 +41,62 @@ public class RegExp extends Importable{
rechts = null;
}
public RegExp splitOffInParenthesesGroups(String s) {
ArrayList<String> parantheses = new ArrayList<>();
public RegExp loopRecursiefDoorString(String s, RegExp r) {
Deque<Integer> start = new ArrayDeque<>();
Deque<Integer> end = new ArrayDeque<>();
RegExp retval = new RegExp();
ArrayList<Tuple<Integer, Integer>> segments = new ArrayList<>();
// Vind haakjessets
System.out.println("HAKKE");
int counter = 0;
boolean cutOff = false;
int lastIndex = 0;
// for (int i = 0; i < s.length(); i++) {
// char current = s.charAt(i);
// if (current == '(') {
// if (cutOff) {
//// System.out.println("Substring van HAKKE is " + s.substring(lastIndex, i));
// parantheses.add(s.substring(lastIndex, i));
// lastIndex = i;
// cutOff = false;
// }
// counter++;
// }
// if (current == ')') {
// counter--;
// }
// if (counter == 0) {
// cutOff = true;
// }
// }
Deque<RegExp> processedParantheses = new ArrayDeque<>();
// System.out.println("Substring van HAKKE is " + s.substring(lastIndex, s.length()));
parantheses.add(s.substring(lastIndex, s.length()));
System.out.println("Haakjesset");
System.out.println(parantheses);
for (String paranthesis : parantheses) {
findInParantheses(paranthesis, new RegExp());
}
return retval;
}
public RegExp findInParantheses(String s, RegExp startRegExp) {
RegExp retval = startRegExp;
System.out.println("Begin with string " + s + " in findin");
Deque<Integer> left = new ArrayDeque<>();
Deque<Integer> right = new ArrayDeque<>();
ArrayList<RegExp> toDot = new ArrayList<>();
// Vind haakjes
String toProcessAfter = s;
for (int i = 0; i < s.length(); i++) {
char current = s.charAt(i);
char c = s.charAt(i);
if (current == '(') {
left.add(i);
}
if (current == ')') {
right.add(i);
if (c == '(') {
start.addLast(i);
}
// Combine the last of left with the first of right
if (!left.isEmpty() && !right.isEmpty()) {
int start = left.pollLast();
int end = right.pollFirst();
String substring = s.substring(start + 1, end);
if (c == ')') {
end.addFirst(i);
}
StringBuilder toReplace = new StringBuilder(s.substring(start, end + 1));
StringBuilder replaceWith = new StringBuilder();
replaceWith.append(Transition.ENDCHAR);
String replacedString = s.replace(toReplace, replaceWith);
// Als we een paar haakjes hebben gevonden
retval = retval.punt(toRegExp(substring, startRegExp)); // Voeg de regex van de substring toe aan retval
retval = retval.punt(findInParantheses(replacedString, retval)); // Haal de substring weg uit de string, vind hiervan de regex en voeg die toe aan retval
return retval;
if (!start.isEmpty() && !end.isEmpty()) {
int startPos = start.pollLast();
int endPos = end.pollFirst();
String sub = s.substring(startPos + 1, endPos);
// System.out.println("Sub is " + sub);
StringBuilder toReplace = new StringBuilder(s.substring(startPos, endPos + 1));
StringBuilder replaceWith = new StringBuilder("" + Transition.ENDCHAR);
// System.out.println("Toreplace is " + toReplace.toString());
toProcessAfter = toProcessAfter.replace(toReplace, replaceWith);
processedParantheses.add(stringNaarRegExp(sub, new RegExp(), null));
}
}
if (!s.contains("(") && !s.contains(")")) {
retval = retval.punt(toRegExp(s)); // Als we geen haakjes hebben, dan pakken we de huidige (sub)string en voegen we die toe aan retval
}
return retval;
RegExp reg = stringNaarRegExp(toProcessAfter, r, processedParantheses);
// System.out.println(reg.getTaal(10));
// System.out.println("----------");
return reg;
}
public static RegExp toRegExp(String start) {
return toRegExp(start, new RegExp());
public RegExp naarRegExp(String s) {
// String s is zonder spaces
// RegExp retval = new RegExp();
// Als eerste moet de string in groupen gedeeld worden op basis van haakjes.
// bijv. "a+((ab)*b|ab|(b)*bb)+(abba|baab)+" naar ["a+", [ [["ab"], "*b"], ["ab"], [["b"], "*bb"], "+" ], [ ["abba", "baab"], "+"] ]
// Dit moet dan naar dit worden omgezet:
// a.plus().punt( (ab.ster().punt(b).of(ab).of(b.ster().punt(bb)).plus()) ).punt( abba.punt().baab.plus() )
// Niet gelukt.
// System.out.println("We lopen door " + s);
RegExp r = loopRecursiefDoorString(s, new RegExp());
return r;
}
public static RegExp toRegExp(String start, RegExp startRegExp) {
int maxOps = 200;
System.out.println("Begin met " + start + " in toRegExp");
System.out.println("STARTREG IS ");
System.out.println(startRegExp.getTaal(maxOps));
RegExp retval = new RegExp();
public RegExp stringNaarRegExp(String start, RegExp initieel, Deque<RegExp> alreadyParsed) {
ArrayList<RegExp> toOf = new ArrayList<>();
ArrayList<String> strings = new ArrayList<>();
@@ -159,12 +116,14 @@ public class RegExp extends Importable{
strings.add(currentString.toString());
System.out.println("OfStrings: " + strings);
// System.out.println("OfStrings: " + strings);
ArrayList<RegExp> toDot = new ArrayList<>();
RegExp retval = new RegExp();
toDot.add(initieel);
for (String s : strings) {
System.out.println("Parsing " + s);
RegExp total = new RegExp();
ArrayList<RegExp> toDot = new ArrayList<>();
for (int i = 0; i < s.length(); i++) {
char current = s.charAt(i);
RegExp prev;
@@ -173,39 +132,28 @@ public class RegExp extends Importable{
prev = toDot.get(toDot.size() -1);
toDot.add(prev.ster());
toDot.remove(prev);
// System.out.println(prev.getTaal(5));
// System.out.println(prev.ster().getTaal(100));
// System.out.println(Main.aantalOperators(prev.ster(), 1));
System.out.println("STER");
// System.out.println("STER");
break;
case '+':
prev = toDot.get(toDot.size() -1);
toDot.add(prev.plus());
toDot.remove(prev);
System.out.println("PLUS");
// System.out.println("PLUS");
break;
case Transition.ENDCHAR:
toDot.add(startRegExp);
System.out.println("PREVIOUS REGEX");
toDot.add(alreadyParsed.pollFirst());
break;
default:
toDot.add(new RegExp("" + current));
System.out.println("NEW CHAR");
// System.out.println("Adding new regExp to toDot");
// System.out.println("NEW CHAR");
break;
}
// System.out.println("-----");
}
// System.out.println("ToDot contains");
// System.out.println(toDot);
for (RegExp regExp : toDot) {
System.out.println("Dotting " + regExp.getTaal(maxOps));
total = total.punt(regExp);
// System.out.println("Total is: L: (" + total.links + ") R: (" + total.rechts + ")");
}
System.out.println("Adding to toOf");
System.out.println(total.getTaal(maxOps));
toOf.add(total);
}
RegExp prev = null;
@@ -215,320 +163,18 @@ public class RegExp extends Importable{
prev = regExp;
continue;
}
System.out.println("Offing " + prev.getTaal(maxOps));
System.out.println("With " + regExp.getTaal(maxOps));
retval = prev.of(regExp);
System.out.println("Result " + retval.getTaal(maxOps));
System.out.println("------");
prev = retval;
} else {
retval = retval.punt(regExp);
}
}
System.out.println("toRegExp");
System.out.println(Main.aantalOperators(retval, 1));
System.out.println(retval.getTaal(200));
// Main.traverseRegExp(retval);
System.out.println("----");
return retval;
}
public RegExp parseString(String s) {
StringBuilder noSpacesString = new StringBuilder();
for (char c : s.toCharArray()) {
if (c != ' ') {
noSpacesString.append(c);
}
}
return naarRegExp(noSpacesString.toString());
}
public boolean hasEvenAmountOfParantheses(String s) {
int counter = 0;
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (c == '(') {
counter++;
}
if (c == ')') {
counter--;
}
}
return counter == 0;
}
public RegExpGroup splitsOpBasisVanHaakjes(String s) {
System.out.println("SplitsOpBasisVanHaakjes de string " + s);
ArrayList<String> haakjesList = new ArrayList<>();
ArrayList<RegExpGroup> regExpGroup = new ArrayList<>();
int counter = 0;
boolean cutOff = false;
int lastIndex = 0;
int leftLocation = -1;
int rightLocation = -1;
for (int i = 0; i < s.length(); i++) {
char current = s.charAt(i);
// We can look for |'s if we only have root parantheses
if (current == '(') {
if (cutOff) {
String sub = s.substring(lastIndex, i);
haakjesList.add(sub);
if (leftLocation != -1 && rightLocation != -1) {
regExpGroup.add(new RegExpGroup(Operator.PUNT,
new RegExpGroup(Operator.PUNT, s.substring(leftLocation + 1, rightLocation)),
new RegExpGroup(Operator.PUNT, s.substring(rightLocation + 1, i))
));
} else {
regExpGroup.add(new RegExpGroup(Operator.PUNT,
new RegExpGroup(Operator.PUNT, s.substring(lastIndex, i)),
null
));
}
lastIndex = i;
cutOff = false;
leftLocation = -1;
rightLocation = -1;
}
if (counter == 0) {
leftLocation = i;
}
counter++;
}
if (current == ')') {
counter--;
if (counter == 0) {
rightLocation = i;
}
}
if (counter == 0) {
cutOff = true;
}
}
haakjesList.add(s.substring(lastIndex, s.length()));
if (leftLocation != -1 && rightLocation != -1) {
regExpGroup.add(new RegExpGroup(Operator.PUNT,
new RegExpGroup(Operator.PUNT, s.substring(leftLocation + 1, rightLocation)),
new RegExpGroup(Operator.PUNT, s.substring(rightLocation + 1, s.length())
)));
} else {
regExpGroup.add(new RegExpGroup(Operator.PUNT,
new RegExpGroup(Operator.PUNT, s.substring(lastIndex, s.length())),
null
));
}
// System.out.println("Haakjesset");
// System.out.println(haakjesList);
// System.exit(1);
// regExpGroup.forEach(System.out::println);
ArrayList<RegExpGroup> test = new ArrayList<>();
for (RegExpGroup expGroup : regExpGroup) {
if (expGroup.left != null && expGroup.left.regex != null && expGroup.left.regex.contains("(")) { // if not done removing parantheses.
expGroup.left = splitsOpBasisVanHaakjes(expGroup.left.regex);
}
if (expGroup.right != null && expGroup.right.regex != null && expGroup.right.regex.contains("(")) {
expGroup.right = splitsOpBasisVanHaakjes(expGroup.right.regex);
}
test.add(expGroup);
}
RegExpGroup prev = null;
for (RegExpGroup regExpGroup1 : test) {
if (test.size() > 1) {
if (prev == null) {
prev = regExpGroup1;
continue;
}
if (prev.right == null) {
// we don't need to make a new one, we can just add it right.
prev.right = regExpGroup1;
} else {
prev = new RegExpGroup(Operator.PUNT, prev, regExpGroup1);
}
} else {
prev = regExpGroup1;
}
}
// System.out.println("TEST");
//// System.out.println(prev);
// prev.traverse();
return prev;
}
public RegExp loopDoorSegment(String s) {
for (int i = 0; i < s.length(); i++) {
System.out.println(s.charAt(i));
}
return null;
}
public RegExp loopRecursiefDoorString(String s, RegExp r) {
System.out.println("Loop door " + s);
RegExp retval = null;
int count = 0;
int begin = 0;
int eind = 0;
boolean cut = false;
boolean cutting = false;
StringBuilder sb = new StringBuilder();
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (c == '(') {
if (count == 0) {
begin = i + 1;
cutting = true;
}
count++;
}
if (c == ')') {
if (count == 1) { // if last count
eind = i;
cut = true;
cutting = false;
}
count--;
}
if (cut) {
String sub = s.substring(begin, eind);
String links = s.substring(0, begin - 1);
String rechts = s.substring(eind + 1, s.length());
System.out.println("L");
System.out.println(links);
System.out.println("M");
System.out.println(sub);
System.out.println("R");
System.out.println(rechts);
System.out.println("----");
// if (links.contains("(")) {
// loopRecursiefDoorString(links, r);
// }
// if (sub.contains("(")) {
// loopRecursiefDoorString(sub, r);
// }
// if (rechts.contains("(")) {
// loopRecursiefDoorString(rechts, r);
// }
retval = loopRecursiefDoorString(sub, r);
cut = false;
}
}
System.out.println("RETURN");
return retval;
}
public RegExp naarRegExp(String s) {
// String s is zonder spaces
RegExp retval = new RegExp();
// Als eerste moet de string in groupen gedeeld worden op basis van haakjes.
// bijv. "a+((ab)*b|ab|(b)*bb)+(abba|baab)+" naar ["a+", [ [["ab"], "*b"], ["ab"], [["b"], "*bb"], "+" ], [ ["abba", "baab"], "+"] ]
// Dit moet dan naar dit worden omgezet:
// a.plus().punt( (ab.ster().punt(b).of(ab).of(b.ster().punt(bb)).plus()) ).punt( abba.punt().baab.plus() )
// splitsOpBasisVanHaakjes(s);
System.out.println("We lopen door " + s);
RegExp r = loopRecursiefDoorString(s, new RegExp());
System.out.println(r.getTaal(20));
System.out.println("---");
// System.out.println("PRINTSTART");
//// r.traverse();
// System.out.println("PRINTEND");
// System.out.println("TEST");
// System.out.println(stringNaarRegExp("a|bb", new RegExp("b")).getTaal(10));
// System.out.println("----");
return retval;
}
public RegExp stringNaarRegExp(String s, RegExp initieel) {
ArrayList<RegExp> toDot = new ArrayList<>();
RegExp retval = new RegExp();
toDot.add(initieel);
for (int i = 0; i < s.length(); i++) {
char current = s.charAt(i);
RegExp prev;
switch(current) {
case '*':
prev = toDot.get(toDot.size() -1);
toDot.add(prev.ster());
toDot.remove(prev);
// System.out.println("STER");
break;
case '+':
prev = toDot.get(toDot.size() -1);
toDot.add(prev.plus());
toDot.remove(prev);
// System.out.println("PLUS");
break;
case '|':
default:
toDot.add(new RegExp("" + current));
// System.out.println("NEW CHAR");
break;
}
}
for (RegExp regExp : toDot) {
retval = retval.punt(regExp);
}
return retval;
}
private class RegExpGroup {
public Operator op;
public RegExpGroup left;
public RegExpGroup right;
public String regex = null;
public RegExpGroup(Operator op, String substring) {
this.op = op;
regex = substring;
}
public RegExpGroup(Operator op, RegExpGroup left, RegExpGroup right) {
this.op = op;
this.left = left;
this.right = right;
}
public boolean isOf() {
return this.op == Operator.OF;
}
public void traverse() {
System.out.println(this.regex);
if (this.left != null) {
// System.out.println("LEFT");
this.left.traverse();
}
if (this.right != null) {
// System.out.println("RIGHT");
this.right.traverse();
}
}
@Override
public String toString() {
return "(" + left + " -- " + regex + " -- " + right + ")";
}
}
public RegExp plus()
{
@@ -627,6 +273,21 @@ public class RegExp extends Importable{
return new Taal(resultaat);
}
public class Tuple<X, Y> {
public final X left;
public final Y right;
public Tuple(X left, Y right) {
this.left = left;
this.right = right;
}
@Override
public String toString() {
return "(" + left + "," + right + ")";
}
}
@Override
public String toString() {
return "Chars: " + this.characters + " en operator: " + this.operator;