#include "lexi.h" vector lexi::lexical_analyzer(vector data, string file_name) { string cur_string; string next_character; for(unsigned int i = 0; i < data.size(); i++) { for(unsigned int j = 0; j < data[i].size(); j ++) { cur_string = data[i].at(j); if(j < data[i].size() - 1) { next_character = data[i].at(j + 1); } else { next_character = ""; } string state = determiner(cur_string, next_character); simple_state(data, state, cur_string, next_character, i, j); simple_state_string(data, state, cur_string, next_character, i, j); if(state == "start of string") { string token_symbol = string_finder(data, i, j); if(token_symbol != "error") { token_symbol.erase(0,1); token t("STRING", token_symbol, i + 1); tokens.push_back(t); } else { write_to_file(file_name, i + 1); return tokens; } } simple_comment(data, state, cur_string, next_character, i, j); simple_id(data, state, cur_string, next_character, i, j); if(state == "error") { write_to_file(file_name, i + 1); return tokens; } } } write_to_file(file_name); return tokens; } void lexi::write_to_file(string file_name, int line) { ofstream myfile; myfile.open(file_name.c_str()); myfile << "Error on line " << line << endl; myfile.close(); } void lexi::write_to_file(string file_name) { ofstream myfile; myfile.open(file_name.c_str()); for(unsigned int i = 0; i < tokens.size(); i++) { if(i < tokens.size()) { myfile << tokens[i] << endl; } else { myfile << tokens[i]; } } myfile << "Total Tokens = " << tokens.size(); myfile << endl; myfile.close(); } bool lexi::simple_comment(vector & data, string state, string cur_string, string next_character, int i, int j) { if(state == "comment") { string token_symbol = comment_finder(data, i, j); } return true; } bool lexi::simple_id(vector & data, string state, string cur_string, string next_character, int i, int j) { if(state == "id") { string token_symbol = id_finder(data, i, j); if(token_symbol != "error") { token t("ID", token_symbol, i + 1); tokens.push_back(t); } } return true; } bool lexi::simple_state_string(vector & data, string state, string cur_string, string next_character, int i, int j) { if(state == "simple_string") { string token_symbol = det_type_simple_string(data, i, j); if(token_symbol != "wrong") { string token_type = type_simple_string(token_symbol); token t(token_type, token_symbol, i + 1); tokens.push_back(t); } else { string token_symbol = id_finder(data, i, j); if(token_symbol != "error") { token t("ID", token_symbol, i + 1); tokens.push_back(t); } } } return true; } bool lexi::simple_state(vector & data, string state, string cur_string, string next_character, int i, int j) { if(state == "simple") { string token_symbol = type_simple(cur_string, next_character); if(next_character == "-") { data[i].replace(j,2, " "); } else { data[i].replace(j,1, " "); } string token_id = type_simple_caps(cur_string, next_character); token t(token_id, token_symbol, i + 1); tokens.push_back(t); } return true; } string lexi::determiner(string cur_string, string next_character) { if(det_help_simple(cur_string, next_character)) { return "simple"; } else if(det_help_simple_string(cur_string, next_character)) { return "simple_string"; } else if(det_help_id(cur_string)) { return "id"; } else if(cur_string == "'") { return "start of string"; } else if(cur_string == "#") { return "comment"; } else { string temp = incorrect(cur_string); return temp; } return ""; } bool lexi::det_help_id(string cur_string) { if(('A' <= cur_string[0] && cur_string[0] <= 'Z') || ('a' <= cur_string[0] && cur_string[0] <= 'z')) { return true; } return false; } bool lexi::quick_help(string a, string b) { if(a == "S" && b == "c") { return true; } return false; } bool lexi::det_help_simple_string(string cur_string, string next_character) { if(quick_help(cur_string, next_character)) { return true; } else if((cur_string == "Q" && next_character == "u")) { return true; } else if((cur_string == "R" && next_character == "u")) { return true; } else if((cur_string == "F" && next_character == "a")) { return true; } return false; } bool lexi::det_help_simple(string cur_string, string next_character) { if(cur_string == "," || cur_string == "." || cur_string == "?" || cur_string == "(" || cur_string == ")" || cur_string == ":") { type_simple(cur_string, next_character); return true; } return false; } string lexi::incorrect(string cur_string) { if(cur_string == " " || cur_string == "\t") { return "fine"; } else if(!(('A' <= cur_string[0] && cur_string[0] <= 'Z') || ('a' <= cur_string[0] && cur_string[0] <= 'z'))) { return "error"; } return " "; } string lexi::id_finder(vector & data, int a, int b) { string cur_string; string next_character; for(unsigned int j = b; j < data[a].size(); j++) { cur_string += data[a].at(j); if(j < data[a].size() - 1) { next_character = data[a].at(j + 1); } else { next_character = "!"; } if(is_char_valid(next_character[0]) || next_character == "!") { data[a].replace(data[a].begin() + b, data[a].begin() + j + 1, " "); return cur_string; } } return " "; } string lexi::comment_finder(vector & data, int i, int b) { string cur_string; string next_character; for(unsigned int j = b; j < data[i].size(); j++) { cur_string += data[i].at(j); if(j < data[i].size() - 1) { next_character = data[i].at(j + 1); } else { next_character = "!"; } if((j > data[i].size()) - 1 && next_character != "!") { data[i].replace(data[i].begin() + b, data[i].end(), " "); return cur_string; } } return "error"; } string lexi::string_finder(vector & data, int a, int b) { string cur_string; string next_character; b = data[a].find('\''); for(unsigned int j = b; j < data[a].size(); j++) { cur_string += data[a].at(j); if(j < data[a].size() - 1) { next_character = data[a].at(j + 1); } if(next_character == "'") { data[a].replace(data[a].begin() + b, data[a].begin() + j + 2, " "); data[a].insert(data[a].begin() + b, ' '); return cur_string; } } return "error"; } string lexi::type_simple_caps(string symbol, string next_symbol) { if(symbol == ",") { return "COMMA"; } else if(symbol == ".") { return "PERIOD"; } else if(symbol == "?") { return "Q_MARK"; } else if(symbol == "(") { return "LEFT_PAREN"; } else if(symbol == ")") { return "RIGHT_PAREN"; } else if(symbol == ":") { if(next_symbol == "-") { return "COLON_DASH"; } return "COLON"; } return ""; } string lexi::type_simple(string symbol, string next_symbol) { if(symbol == ",") { return ","; } else if(symbol == ".") { return "."; } else if(symbol == "?") { return "?"; } else if(symbol == "(") { return "("; } else if(symbol == ")") { return ")"; } else if(symbol == ":") { if(next_symbol == "-") { return ":-"; } return ":"; } return ""; } string lexi::det_type_simple_string(vector & data, int i, int b) { string cur_string; string next_character; string special_case; if(b > 0) { special_case = data[i].at(b -1); } for(unsigned int j = b; j < data[i].size(); j++) { cur_string += data[i].at(j); if(j < data[i].size() - 1) { next_character = data[i].at(j + 1); } else { next_character = "!"; } if((is_simple_string(cur_string)) && (is_char_valid(next_character.at(0))) && (is_char_valid(special_case[0]))) { data[i].replace(data[i].begin() + b, data[i].begin() + j + 1, " "); return cur_string; } } return "wrong"; } bool lexi::is_char_valid(char next_character) { if(!(('A' <= next_character && next_character <= 'Z') || ('a' <= next_character && next_character <= 'z') || ('0' <= next_character && next_character <= '9')) || (next_character == '\'')) { return true; } return false; } bool lexi::is_simple_string(string simple_com) { if(simple_com == "Schemes") { return true; } else if(simple_com == "Facts") { return true; } else if(simple_com == "Rules") { return true; } else if(simple_com == "Queries") { return true; } return false; } string lexi::type_simple_string(string simple_com) { if(simple_com == "Schemes") { return "SCHEMES"; } else if(simple_com == "Facts") { return "FACTS"; } else if(simple_com == "Rules") { return "RULES"; } else if(simple_com == "Queries") { return "QUERIES"; } return ""; }