362 lines
10 KiB
C++
362 lines
10 KiB
C++
#include "lexi.h"
|
|
|
|
vector<token> lexi::lexical_analyzer(vector<string> data, string file_name) {
|
|
string cur_string;
|
|
string next_character;
|
|
for(unsigned int i = 0; i < data.size(); i++) {
|
|
for(unsigned int j = 0; j < data[i].size(); j ++) {
|
|
cur_string = data[i].at(j);
|
|
if(j < data[i].size() - 1) {
|
|
next_character = data[i].at(j + 1);
|
|
}
|
|
else {
|
|
next_character = "";
|
|
}
|
|
string state = determiner(cur_string, next_character);
|
|
simple_state(data, state, cur_string, next_character, i, j);
|
|
simple_state_string(data, state, cur_string, next_character, i, j);
|
|
if(state == "start of string") {
|
|
string token_symbol = string_finder(data, i, j);
|
|
if(token_symbol != "error") {
|
|
token_symbol.erase(0,1);
|
|
token t("STRING", token_symbol, i + 1);
|
|
tokens.push_back(t);
|
|
}
|
|
else {
|
|
write_to_file(file_name, i + 1);
|
|
return tokens;
|
|
}
|
|
}
|
|
simple_comment(data, state, cur_string, next_character, i, j);
|
|
simple_id(data, state, cur_string, next_character, i, j);
|
|
if(state == "error") {
|
|
write_to_file(file_name, i + 1);
|
|
return tokens;
|
|
}
|
|
}
|
|
}
|
|
write_to_file(file_name);
|
|
return tokens;
|
|
}
|
|
|
|
void lexi::write_to_file(string file_name, int line) {
|
|
ofstream myfile;
|
|
myfile.open(file_name.c_str());
|
|
myfile << "Error on line " << line << endl;
|
|
myfile.close();
|
|
}
|
|
|
|
void lexi::write_to_file(string file_name) {
|
|
ofstream myfile;
|
|
myfile.open(file_name.c_str());
|
|
for(unsigned int i = 0; i < tokens.size(); i++) {
|
|
if(i < tokens.size()) {
|
|
myfile << tokens[i] << endl;
|
|
}
|
|
else {
|
|
myfile << tokens[i];
|
|
}
|
|
}
|
|
myfile << "Total Tokens = " << tokens.size();
|
|
myfile << endl;
|
|
myfile.close();
|
|
}
|
|
|
|
|
|
bool lexi::simple_comment(vector<string> & data, string state, string cur_string, string next_character, int i, int j) {
|
|
if(state == "comment") {
|
|
string token_symbol = comment_finder(data, i, j);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool lexi::simple_id(vector<string> & data, string state, string cur_string, string next_character, int i, int j) {
|
|
if(state == "id") {
|
|
string token_symbol = id_finder(data, i, j);
|
|
if(token_symbol != "error") {
|
|
token t("ID", token_symbol, i + 1);
|
|
tokens.push_back(t);
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool lexi::simple_state_string(vector<string> & data, string state, string cur_string, string next_character, int i, int j) {
|
|
if(state == "simple_string") {
|
|
string token_symbol = det_type_simple_string(data, i, j);
|
|
if(token_symbol != "wrong") {
|
|
string token_type = type_simple_string(token_symbol);
|
|
token t(token_type, token_symbol, i + 1);
|
|
tokens.push_back(t);
|
|
}
|
|
else {
|
|
string token_symbol = id_finder(data, i, j);
|
|
if(token_symbol != "error") {
|
|
token t("ID", token_symbol, i + 1);
|
|
tokens.push_back(t);
|
|
}
|
|
}
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool lexi::simple_state(vector<string> & data, string state, string cur_string, string next_character, int i, int j) {
|
|
if(state == "simple") {
|
|
string token_symbol = type_simple(cur_string, next_character);
|
|
if(next_character == "-") {
|
|
data[i].replace(j,2, " ");
|
|
}
|
|
else {
|
|
data[i].replace(j,1, " ");
|
|
}
|
|
string token_id = type_simple_caps(cur_string, next_character);
|
|
token t(token_id, token_symbol, i + 1);
|
|
tokens.push_back(t);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
string lexi::determiner(string cur_string, string next_character) {
|
|
if(det_help_simple(cur_string, next_character)) {
|
|
return "simple";
|
|
}
|
|
else if(det_help_simple_string(cur_string, next_character)) {
|
|
return "simple_string";
|
|
}
|
|
else if(det_help_id(cur_string)) {
|
|
return "id";
|
|
}
|
|
else if(cur_string == "'") {
|
|
return "start of string";
|
|
}
|
|
else if(cur_string == "#") {
|
|
return "comment";
|
|
}
|
|
else {
|
|
string temp = incorrect(cur_string);
|
|
return temp;
|
|
}
|
|
return "";
|
|
}
|
|
|
|
bool lexi::det_help_id(string cur_string) {
|
|
if(('A' <= cur_string[0] && cur_string[0] <= 'Z') ||
|
|
('a' <= cur_string[0] && cur_string[0] <= 'z')) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool lexi::quick_help(string a, string b) {
|
|
if(a == "S" && b == "c") {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool lexi::det_help_simple_string(string cur_string, string next_character) {
|
|
if(quick_help(cur_string, next_character)) {
|
|
return true;
|
|
}
|
|
else if((cur_string == "Q" && next_character == "u")) {
|
|
return true;
|
|
}
|
|
else if((cur_string == "R" && next_character == "u")) {
|
|
return true;
|
|
}
|
|
else if((cur_string == "F" && next_character == "a")) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool lexi::det_help_simple(string cur_string, string next_character) {
|
|
if(cur_string == "," || cur_string == "." || cur_string == "?" ||
|
|
cur_string == "(" || cur_string == ")" || cur_string == ":") {
|
|
type_simple(cur_string, next_character);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
string lexi::incorrect(string cur_string) {
|
|
if(cur_string == " " || cur_string == "\t") {
|
|
return "fine";
|
|
}
|
|
else if(!(('A' <= cur_string[0] && cur_string[0] <= 'Z') ||
|
|
('a' <= cur_string[0] && cur_string[0] <= 'z'))) {
|
|
return "error";
|
|
}
|
|
return " ";
|
|
}
|
|
|
|
string lexi::id_finder(vector<string> & data, int a, int b) {
|
|
string cur_string;
|
|
string next_character;
|
|
for(unsigned int j = b; j < data[a].size(); j++) {
|
|
cur_string += data[a].at(j);
|
|
if(j < data[a].size() - 1) {
|
|
next_character = data[a].at(j + 1);
|
|
}
|
|
else {
|
|
next_character = "!";
|
|
}
|
|
if(is_char_valid(next_character[0]) || next_character == "!") {
|
|
data[a].replace(data[a].begin() + b, data[a].begin() + j + 1, " ");
|
|
return cur_string;
|
|
}
|
|
}
|
|
return " ";
|
|
}
|
|
|
|
string lexi::comment_finder(vector<string> & data, int i, int b) {
|
|
string cur_string;
|
|
string next_character;
|
|
for(unsigned int j = b; j < data[i].size(); j++) {
|
|
cur_string += data[i].at(j);
|
|
if(j < data[i].size() - 1) {
|
|
next_character = data[i].at(j + 1);
|
|
}
|
|
else {
|
|
next_character = "!";
|
|
}
|
|
if((j > data[i].size()) - 1 && next_character != "!") {
|
|
data[i].replace(data[i].begin() + b, data[i].end(), " ");
|
|
return cur_string;
|
|
}
|
|
}
|
|
return "error";
|
|
}
|
|
|
|
string lexi::string_finder(vector<string> & data, int a, int b) {
|
|
string cur_string;
|
|
string next_character;
|
|
b = data[a].find('\'');
|
|
for(unsigned int j = b; j < data[a].size(); j++) {
|
|
cur_string += data[a].at(j);
|
|
if(j < data[a].size() - 1) {
|
|
next_character = data[a].at(j + 1);
|
|
}
|
|
if(next_character == "'") {
|
|
data[a].replace(data[a].begin() + b, data[a].begin() + j + 2, " ");
|
|
data[a].insert(data[a].begin() + b, ' ');
|
|
return cur_string;
|
|
}
|
|
}
|
|
return "error";
|
|
}
|
|
|
|
string lexi::type_simple_caps(string symbol, string next_symbol) {
|
|
if(symbol == ",") {
|
|
return "COMMA";
|
|
}
|
|
else if(symbol == ".") {
|
|
return "PERIOD";
|
|
}
|
|
else if(symbol == "?") {
|
|
return "Q_MARK";
|
|
}
|
|
else if(symbol == "(") {
|
|
return "LEFT_PAREN";
|
|
}
|
|
else if(symbol == ")") {
|
|
return "RIGHT_PAREN";
|
|
}
|
|
else if(symbol == ":") {
|
|
if(next_symbol == "-") {
|
|
return "COLON_DASH";
|
|
}
|
|
return "COLON";
|
|
}
|
|
return "";
|
|
}
|
|
|
|
string lexi::type_simple(string symbol, string next_symbol) {
|
|
if(symbol == ",") {
|
|
return ",";
|
|
}
|
|
else if(symbol == ".") {
|
|
return ".";
|
|
}
|
|
else if(symbol == "?") {
|
|
return "?";
|
|
}
|
|
else if(symbol == "(") {
|
|
return "(";
|
|
}
|
|
else if(symbol == ")") {
|
|
return ")";
|
|
}
|
|
else if(symbol == ":") {
|
|
if(next_symbol == "-") {
|
|
return ":-";
|
|
}
|
|
return ":";
|
|
}
|
|
return "";
|
|
}
|
|
|
|
string lexi::det_type_simple_string(vector<string> & data, int i, int b) {
|
|
string cur_string;
|
|
string next_character;
|
|
string special_case;
|
|
if(b > 0) {
|
|
special_case = data[i].at(b -1);
|
|
}
|
|
for(unsigned int j = b; j < data[i].size(); j++) {
|
|
cur_string += data[i].at(j);
|
|
if(j < data[i].size() - 1) {
|
|
next_character = data[i].at(j + 1);
|
|
}
|
|
else {
|
|
next_character = "!";
|
|
}
|
|
if((is_simple_string(cur_string)) && (is_char_valid(next_character.at(0))) && (is_char_valid(special_case[0]))) {
|
|
data[i].replace(data[i].begin() + b, data[i].begin() + j + 1, " ");
|
|
return cur_string;
|
|
}
|
|
}
|
|
return "wrong";
|
|
}
|
|
|
|
bool lexi::is_char_valid(char next_character) {
|
|
if(!(('A' <= next_character && next_character <= 'Z') ||
|
|
('a' <= next_character && next_character <= 'z') ||
|
|
('0' <= next_character && next_character <= '9')) || (next_character == '\'')) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool lexi::is_simple_string(string simple_com) {
|
|
if(simple_com == "Schemes") {
|
|
return true;
|
|
}
|
|
else if(simple_com == "Facts") {
|
|
return true;
|
|
}
|
|
else if(simple_com == "Rules") {
|
|
return true;
|
|
}
|
|
else if(simple_com == "Queries") {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
string lexi::type_simple_string(string simple_com) {
|
|
if(simple_com == "Schemes") {
|
|
return "SCHEMES";
|
|
}
|
|
else if(simple_com == "Facts") {
|
|
return "FACTS";
|
|
}
|
|
else if(simple_com == "Rules") {
|
|
return "RULES";
|
|
}
|
|
else if(simple_com == "Queries") {
|
|
return "QUERIES";
|
|
}
|
|
return "";
|
|
}
|