Add Prolog project

This commit is contained in:
Davide Depau 2022-12-05 22:05:23 +01:00
commit aab33ea244
6 changed files with 679 additions and 0 deletions

3
.gitmodules vendored Normal file
View file

@ -0,0 +1,3 @@
[submodule "JSONTestSuite"]
path = JSONTestSuite
url = https://github.com/nst/JSONTestSuite.git

1
JSONTestSuite Submodule

@ -0,0 +1 @@
Subproject commit d64aefb55228d9584d3e5b2433f720ea8fd00c82

561
Prolog/jsonparse.pl Normal file
View file

@ -0,0 +1,561 @@
% Whitespace helpers
is_whitespace(' ').
is_whitespace('\n').
is_whitespace('\t').
is_whitespace('\r').
strip_leading_whitespace([CIn | CIns], COuts) :-
is_whitespace(CIn),
!,
strip_leading_whitespace(CIns, COuts).
strip_leading_whitespace(CIns, CIns) :- !.
% Indentation helpers
indent(Level, In, Out) :-
!,
indent(Level, In, Out, first).
indent(0, Chars, Chars, first) :- !.
indent(0, Chars, ['\n' | Chars], nth) :- !.
indent(IndentLevel, In, ['\n' | Out], _) :-
IndentLevel > 0,
!,
do_indent(IndentLevel, In, Out).
do_indent(0, Chars, Chars) :- !.
do_indent(IndentLevel, In, [' ' | Out]) :-
IndentLevel > 0,
!,
NewIndentLevel is IndentLevel - 1,
do_indent(NewIndentLevel, In, Out).
% Helpers for serializing JSON comma-separated sequences with indentation
%json_comma_separated_list(Chars, IndentLevel, IndentIncrement, First, Items, ItemSerializer, )
separator(first, []). % No separator before first item
separator(nth, [',']). % Separator before all but first item
% Handle empty list/object
json_comma_separated_list([], _, _, first, [], _) :- !.
% Handle exactly one item
json_comma_separated_list(Chars, IndentLevel, IndentIncrement, first, [Item], ItemSerializer) :-
!,
call(ItemSerializer, Chars, Item, IndentLevel, IndentIncrement).
% Handle last item
json_comma_separated_list([], _, 0, _, [], _) :- !.
json_comma_separated_list(Chars, IndentLevel, IndentIncrement, nth, [], _) :-
!,
NewLevel is IndentLevel - IndentIncrement,
indent(NewLevel, [], Chars, nth).
% Handle non-empty list/object
json_comma_separated_list(Chars, IndentLevel, IndentIncrement, First, [I | Items], ItemSerializer) :-
!,
NewIndentLevel is IndentLevel + IndentIncrement,
separator(First, Separator),
call(ItemSerializer, ItemChars, I, NewIndentLevel, IndentIncrement),
indent(IndentLevel, ItemChars, IndentChars),
json_comma_separated_list(ChildChars, IndentLevel, IndentIncrement, nth, Items, ItemSerializer),
concat(Separator, IndentChars, C1),
concat(C1, ChildChars, Chars).
% String append/prepend utils
concat(Atom, List, Out) :-
atom(Atom),
!,
concat([Atom], List, Out).
concat(List, Atom, Out) :-
atom(Atom),
!,
concat(List, [Atom], Out).
concat([], List, List) :- !.
concat([P | Ps], List, [P | Out]) :-
!,
concat(Ps, List, Out).
wrap(WrapChar, List, Appended) :-
concat(WrapChar, List, Prepended),
concat(Prepended, WrapChar, Appended).
% Key-value pair list helpers
kvplist_getitem(Key, [(Key, Val) | _], Val).
kvplist_getitem(Key, [_ | KVPs], Value) :-
kvplist_getitem(Key, KVPs, Value).
% jsonaccess/3: jsonaccess(JsonObj, Fields, Result).
% Fields is either list of strings/ints, a string or an int
% Empty JSON path, computation is complete
jsonaccess(JsonObj, [], JsonObj) :- !.
% Handle accessing a JSON array by index
jsonaccess(jsonarray(JArr), [F | Fs], Result) :-
number(F),
!,
nth0(F, JArr, Item),
jsonaccess(Item, Fs, Result),
!.
% Handle accessing a JSON object by key, key is a string
jsonaccess(jsonobj(KVPList), [F | Fs], Result) :-
string(F),
!,
kvplist_getitem(F, KVPList, Item),
jsonaccess(Item, Fs, Result),
!.
% Handle accessing a JSON object by key, key is an atom
jsonaccess(jsonobj(KVPList), [F | Fs], Result) :-
atom(F),
!,
atom_string(F, String),
jsonaccess(jsonobj(KVPList), [String | Fs], Result).
% Forbid other types of keys
jsonaccess(_, NonList, _) :-
is_list(NonList),
!,
fail.
% Use the previous cut to convert the special case where a single key was
% provided directly into the general field path list.
jsonaccess(JsonObject, NonList, Result) :-
jsonaccess(JsonObject, [NonList], Result).
% jsonread/2: jsonread(Filename, JsonObj).
jsonread(Filename, JsonObj) :-
open(Filename, read, Stream),
read_string(Stream, _, String),
close(Stream),
jsonparse(String, JsonObj).
% jsondump/2: jsondump(JsonObj, Filename).
jsondump(JsonObj, Filename) :-
open(Filename, write, Stream),
jsonparse(String, JsonObj),
write(Stream, String),
close(Stream).
% jsonparse/2: jsonparse(JsonString, JsonObj).
% Serialize JSON to string
jsonparse(JsonString, JSON, IndentIncrement) :-
marshal_json(JsonChars, JSON, 0, IndentIncrement),
string_chars(JsonString, JsonChars).
jsonparse(JsonString, JSON) :-
var(JsonString),
functor(JSON, _, _), % Redundant since it would fail otherwise
!,
jsonparse(JsonString, JSON, 2).
% unmarshal a single atom converting it to a list of chars
jsonparse(JsonAtom, JSON) :-
atom(JsonAtom),
!,
atom_chars(JsonAtom, Chars),
jsonparse(Chars, JSON).
% unmarshal a string converting it to a list of chars
jsonparse(JsonString, JSON) :-
string(JsonString),
!,
string_chars(JsonString, Chars),
jsonparse(Chars, JSON).
% unmarshal a list of codes converting it to a string
jsonparse([C | Codes], JSON) :-
number(C),
!,
string_codes(String, [C | Codes]),
jsonparse(String, JSON).
% unmarshal a list of char atoms
jsonparse([C | Cs], JSON) :-
% atom(C),
!,
unmarshal_json([C | Cs], JSON, Leftover),
ensure_only_whitespace_left(Leftover).
% Eliminate leftover whitespace at the end of a JSON doc
ensure_only_whitespace_left([]).
ensure_only_whitespace_left([C | Cs]) :-
is_whitespace(C),
!,
ensure_only_whitespace_left(Cs).
% Handle bool/null literals
unmarshal_json(['t', 'r', 'u', 'e' | Cs], true, Cs) :- !.
unmarshal_json(['f', 'a', 'l', 's', 'e' | Cs], false, Cs) :- !.
unmarshal_json(['n', 'u', 'l', 'l' | Cs], null, Cs) :- !.
% Strip spaces
unmarshal_json([C | Cs], JSON, Remaining) :-
is_whitespace(C),
!,
unmarshal_json(Cs, JSON, Remaining).
% Handle numbers
unmarshal_json([C | Cs], JSON, Remaining) :-
is_digit(C),
!,
unmarshal_jsonnumber([C | Cs], JSON, Remaining).
unmarshal_json(['-' | Cs], JSON, Remaining) :-
!,
unmarshal_jsonnumber(['-' | Cs], JSON, Remaining).
% Handle strings
unmarshal_json(['"' | Cs], JSON, Remaining) :-
!,
unmarshal_jsonstring(Cs, JSON, Remaining).
% Handle objects
unmarshal_json(['{' | Cs], JSON, Remaining) :-
!,
unmarshal_jsonobject(Cs, JSON, Remaining).
% Handle arrays
unmarshal_json(['[' | Cs], JSON, Remaining) :-
!,
unmarshal_jsonarray(Cs, JSON, Remaining).
%
% Parse JSON strings
%
unmarshal_jsonstring(Cs, JSON, Remaining) :-
!,
do_unmarshal_jsonstring(Cs, regular, Chars, Remaining),
string_chars(JSON, Chars).
% input, regular/escape, output, remaining chars
do_unmarshal_jsonstring(['"' | Cs], regular, [], Cs) :- !.
% Escape mode enter
do_unmarshal_jsonstring(['\\' | Cs], regular, Output, Rem) :-
!,
do_unmarshal_jsonstring(Cs, escape, Output, Rem).
% Regular character
do_unmarshal_jsonstring([C | Cs], regular, [C | Output], Rem) :-
char_code(C, Code),
Code > 0x1f, % Do not allow control characters
!,
do_unmarshal_jsonstring(Cs, regular, Output, Rem).
% Escape handling
% Unicode characters are not handled as per project specs
do_unmarshal_jsonstring(['\\' | Cs], escape, ['\\' | Output], Rem) :-
!,
do_unmarshal_jsonstring(Cs, regular, Output, Rem).
do_unmarshal_jsonstring(['"' | Cs], escape, ['"' | Output], Rem) :-
!,
do_unmarshal_jsonstring(Cs, regular, Output, Rem).
do_unmarshal_jsonstring(['n' | Cs], escape, ['\n' | Output], Rem) :-
!,
do_unmarshal_jsonstring(Cs, regular, Output, Rem).
do_unmarshal_jsonstring(['/' | Cs], escape, ['/' | Output], Rem) :-
!,
do_unmarshal_jsonstring(Cs, regular, Output, Rem).
do_unmarshal_jsonstring(['f' | Cs], escape, ['\f' | Output], Rem) :-
!,
do_unmarshal_jsonstring(Cs, regular, Output, Rem).
do_unmarshal_jsonstring(['r' | Cs], escape, ['\r' | Output], Rem) :-
!,
do_unmarshal_jsonstring(Cs, regular, Output, Rem).
do_unmarshal_jsonstring(['t' | Cs], escape, ['\t' | Output], Rem) :-
!,
do_unmarshal_jsonstring(Cs, regular, Output, Rem).
do_unmarshal_jsonstring(['b' | Cs], escape, ['\b' | Output], Rem) :-
!,
do_unmarshal_jsonstring(Cs, regular, Output, Rem).
% Since we don't handle Unicode, for parsing purposes I'll consider its
% handling implementation-defined; so I'll escape the backslash and
% leave the rest of the sequence as-is.
do_unmarshal_jsonstring(['u' | Cs], escape, ['\\', 'u' | Output], Rem) :-
!,
do_unmarshal_jsonstring(Cs, regular, Output, Rem).
%
% unmarshal JSON numbers
%
unmarshal_jsonnumber([C | Cs], JSON, Remaining) :-
is_digit(C),
!,
do_unmarshal_jsonnumber([C | Cs], empty, Chars, Remaining),
ensure_not_zero_prefix(Chars, CheckedChars),
number_chars(JSON, CheckedChars).
% Parse negative numbers as positive, then add the minus sign back
unmarshal_jsonnumber(['-' | Cs], JSON, Remaining) :-
!,
do_unmarshal_jsonnumber(Cs, empty, Chars, Remaining),
ensure_not_zero_prefix(['-' | Chars], CheckedChars),
number_chars(JSON, CheckedChars).
ensure_not_zero_prefix(['-', '0', C | _], _) :-
is_digit(C),
!,
fail.
ensure_not_zero_prefix(['0', C | _], _) :-
is_digit(C),
!,
fail.
ensure_not_zero_prefix(Cs, Cs) :- !.
% Allow leading zero only if it's a float or the only digit
do_unmarshal_jsonnumber(['0' | Cs], empty, ['0' | Num], Remaining) :-
!,
do_unmarshal_jsonnumber(Cs, leading_zero, Num, Remaining).
do_unmarshal_jsonnumber(['0' | _], leading_zero, _, _) :-
!,
fail.
% Ensure at least one digit has been input before allowing exponents or fractions
do_unmarshal_jsonnumber([C | Cs], empty, [C | Num], Remaining) :-
is_digit(C),
!,
do_unmarshal_jsonnumber([C | Cs], integer, [C | Num], Remaining).
% Handle exponent
do_unmarshal_jsonnumber([C | Cs], incomplete_exponent, [C | Num], Remaining) :-
is_digit(C),
!,
do_unmarshal_jsonnumber([C | Cs], exponent, [C | Num], Remaining).
do_unmarshal_jsonnumber([C | Cs], incomplete_exponent_sign, [C | Num], Remaining) :-
is_digit(C),
!,
do_unmarshal_jsonnumber([C | Cs], exponent, [C | Num], Remaining).
do_unmarshal_jsonnumber([C | Cs], Mode, [C | Num], Remaining) :-
(C == 'e' ; C == 'E'),
(Mode == integer ; Mode == fraction ; Mode == leading_zero),
!,
do_unmarshal_jsonnumber(Cs, incomplete_exponent, Num, Remaining).
do_unmarshal_jsonnumber(['+' | Cs], incomplete_exponent, ['+' | Num], Remaining) :-
!,
do_unmarshal_jsonnumber(Cs, incomplete_exponent_sign, Num, Remaining).
do_unmarshal_jsonnumber(['-' | Cs], incomplete_exponent, ['-' | Num], Remaining) :-
!,
do_unmarshal_jsonnumber(Cs, incomplete_exponent_sign, Num, Remaining).
% Parse fractions
do_unmarshal_jsonnumber(['.' | Cs], Mode, ['.' | Num], Remaining) :-
(Mode = integer ; Mode = leading_zero),
!,
do_unmarshal_jsonnumber(Cs, incomplete_fraction, Num, Remaining).
do_unmarshal_jsonnumber([C | Cs], incomplete_fraction, [C | Num], Remaining) :-
is_digit(C),
!,
do_unmarshal_jsonnumber([C | Cs], fraction, [C | Num], Remaining).
% Parse regular digits without mode FSM changes
do_unmarshal_jsonnumber([C | Cs], Mode, [C | Num], Remaining) :-
is_digit(C),
!,
do_unmarshal_jsonnumber(Cs, Mode, Num, Remaining).
do_unmarshal_jsonnumber(Cs, integer, [], Cs) :- !.
do_unmarshal_jsonnumber(Cs, fraction, [], Cs) :- !.
do_unmarshal_jsonnumber(Cs, exponent, [], Cs) :- !.
do_unmarshal_jsonnumber(Cs, leading_zero, [], Cs) :- !.
%
% unmarshal JSON objects
%
unmarshal_jsonobject(Cs, jsonobj(KVPList), Remaining) :-
do_unmarshal_jsonobject(Cs, can_leave, comma_not_allowed, KVPList, Remaining).
% Done parsing
do_unmarshal_jsonobject(['}' | Cs], can_leave, _, [], Cs) :- !.
% Strip whitespace
do_unmarshal_jsonobject([C | Cs], CanLeave, CommaAllowed, KVPList, Remaining) :-
is_whitespace(C),
!,
do_unmarshal_jsonobject(Cs, CanLeave, CommaAllowed, KVPList, Remaining).
% Handle comma
do_unmarshal_jsonobject([',' | Cs], can_leave, comma_allowed, KVPList, Remaining) :-
!,
do_unmarshal_jsonobject(Cs, cannot_leave, comma_not_allowed, KVPList, Remaining).
% Handle beginning of key-value pair
do_unmarshal_jsonobject(['"' | Cs], _, _, [KVP | KVPList], NewRemaining) :-
!,
unmarshal_jsonobject_kvp(Cs, KVP, Remaining),
do_unmarshal_jsonobject(Remaining, can_leave, comma_allowed, KVPList, NewRemaining).
strip_leading_char([C | Cs], C, Cs).
% Handle KVP
unmarshal_jsonobject_kvp(Cs, (Key, Value), Rem5) :-
unmarshal_jsonstring(Cs, Key, Rem1),
strip_leading_whitespace(Rem1, Rem2),
strip_leading_char(Rem2, ':', Rem3),
strip_leading_whitespace(Rem3, Rem4),
unmarshal_json(Rem4, Value, Rem5).
%
% unmarshal JSON arrays
%
unmarshal_jsonarray(Cs, jsonarray(Items), Remaining) :-
do_unmarshal_jsonarray(Cs, can_leave, comma_not_allowed, Items, Remaining).
% Done parsing
do_unmarshal_jsonarray([']' | Cs], can_leave, _, [], Cs) :- !.
% Strip whitespace
do_unmarshal_jsonarray([C | Cs], CanLeave, CommaAllowed, Items, Remaining) :-
is_whitespace(C),
!,
do_unmarshal_jsonarray(Cs, CanLeave, CommaAllowed, Items, Remaining).
% Handle comma
do_unmarshal_jsonarray([',' | Cs], can_leave, comma_required, Items, Remaining) :-
!,
do_unmarshal_jsonarray(Cs, cannot_leave, comma_not_allowed, Items, Remaining).
% Handle child
do_unmarshal_jsonarray(Cs, _, comma_not_allowed, [Item | Items], Rem2) :-
!,
unmarshal_json(Cs, Item, Rem1),
do_unmarshal_jsonarray(Rem1, can_leave, comma_required, Items, Rem2).
%
% Handle serializing JSON
%
%marshal_json(Chars, JSON, IndentLevel, IndentIncrement).
%
% Marshal bool/null literals
%
marshal_json(Chars, true, _, _) :-
!,
string_chars("true", Chars).
marshal_json(Chars, false, _, _) :-
!,
string_chars("false", Chars).
marshal_json(Chars, null, _, _) :-
!,
string_chars("null", Chars).
%
% Marshal string literals
%
marshal_json(Chars, JSON, _, _) :-
string(JSON),
!,
string_chars(JSON, JStrChars),
escape_jsonstring(JStrChars, EscapedChars),
wrap('"', EscapedChars, Chars).
%
% Marshal number literals
%
% Prolog numbers seem serialize precisely to JSON numbers
marshal_json(Chars, JSON, _, _) :-
number(JSON),
!,
number_chars(JSON, Chars).
%
% Marshal JSON arrays
%
marshal_json(Chars, jsonarray(Items), IndentLevel, IndentIncrement) :-
!,
NewIndentLevel is IndentLevel + IndentIncrement,
json_comma_separated_list(CSLChars, NewIndentLevel, IndentIncrement, first, Items, marshal_json),
concat('[', CSLChars, Prepended),
concat(Prepended, ']', Chars).
%
% Marshal JSON objects
%
marshal_json(Chars, jsonobj(KVPList), IndentLevel, IndentIncrement) :-
!,
json_comma_separated_list(CSLChars, IndentLevel, IndentIncrement, first, KVPList, marshal_json_kvp),
concat('{', CSLChars, Prepended),
concat(Prepended, '}', Chars).
escape_jsonstring([], []) :- !.
escape_jsonstring(['"' | Ins], ['\\', '"' | Outs]) :-
!,
escape_jsonstring(Ins, Outs).
escape_jsonstring(['\\' | Ins], ['\\', '\\' | Outs]) :-
!,
escape_jsonstring(Ins, Outs).
escape_jsonstring(['"' | Ins], ['\\', '"' | Outs]) :-
!,
escape_jsonstring(Ins, Outs).
escape_jsonstring(['\b' | Ins], ['\\', 'b' | Outs]) :-
!,
escape_jsonstring(Ins, Outs).
escape_jsonstring(['\f' | Ins], ['\\', 'f' | Outs]) :-
!,
escape_jsonstring(Ins, Outs).
escape_jsonstring(['\n' | Ins], ['\\', 'n' | Outs]) :-
!,
escape_jsonstring(Ins, Outs).
escape_jsonstring(['\r' | Ins], ['\\', 'r' | Outs]) :-
!,
escape_jsonstring(Ins, Outs).
escape_jsonstring(['\t' | Ins], ['\\', 't' | Outs]) :-
!,
escape_jsonstring(Ins, Outs).
% Escape forward slash only in case of ambiguity with XML closing tag
escape_jsonstring(['<', '/' | Ins], ['<', '\\', '/' | Outs]) :-
!,
escape_jsonstring(Ins, Outs).
escape_jsonstring([C | Ins], [C | Outs]) :-
escape_jsonstring(Ins, Outs).
spacing(0, []) :- !.
spacing(N, [' ']) :-
N > 0,
!.
marshal_json_kvp(Chars, (Key, Value), IndentLevel, IndentIncrement) :-
string(Key),
!,
marshal_json(KeyChars, Key, IndentLevel, IndentIncrement),
marshal_json(ValueChars, Value, IndentLevel, IndentIncrement),
concat(KeyChars, ':', Prepended),
spacing(IndentIncrement, Spaces),
concat(Prepended, Spaces, Prepended2),
concat(Prepended2, ValueChars, Chars).

51
mktracecmd.py Executable file
View file

@ -0,0 +1,51 @@
import re
import sys
from typing import List
def find_functors(prolog_source) -> List[str]:
"""
Find all functors in the given Prolog source code.
Functors look like the following:
functor_name(arg1, arg2, ..., argN).
another_functor(arg1, arg2, ..., argN) :- stuff...
Functors always start at the beginning of the line. All indented blocks are skipped.
We make sure that the functor name is followed by '('.
This function extracts only the names of the functors, not the arguments.
"""
functors = re.findall(r"^(?!\s)[a-z_]+(?=\()", prolog_source, re.MULTILINE)
# Remove duplicates
functors = list(set(functors))
return functors
def main():
"""
1. Read the Prolog source file specified in the first command line argument
2. Find all functors
3. Output a trace command for each functor, like the following: "trace(functor1), trace(functor2), ..., trace(functorN)."
"""
if len(sys.argv) < 2:
print("Usage: mktracecmd.py <prolog source file>")
sys.exit(1)
# Read the Prolog source file
with open(sys.argv[1], "r") as f:
prolog_source = f.read()
# Find all functors
functors = find_functors(prolog_source)
# Output a trace command for each functor
print("trace(" + "), trace(".join(functors) + ").")
if __name__ == "__main__":
main()

36
test_prolog.py Executable file
View file

@ -0,0 +1,36 @@
#!/usr/bin/env python3
"""
Given the following bidirectional Prolog predicate that can serialize and deserialize JSON:
jsonread(Filename, JsonObj)
This script takes a JSON file path as an argument, runs SWI-Prolog
loading the `jsonparse.pl` library and runs a goal that parses the specified JSON file.
The Python script must exit with 0 if the jsonread/2 predicate returned true, 1 otherwise.
"""
import sys
import subprocess
def run_prolog(json_file_path):
"""
Run SWI-Prolog using subprocess loading the `jsonparse.pl` library and runs a goal that parses the specified JSON file.
"""
# Run SWI-Prolog using subprocess
try:
subprocess.run(["swipl", "-s", "Prolog/jsonparse.pl", "-g",
"jsonread('{}', _)".format(json_file_path), "-t", "halt"], check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
except subprocess.CalledProcessError:
return False
return True
if __name__ == "__main__":
if run_prolog(sys.argv[1]):
sys.exit(0)
else:
sys.exit(1)

27
word2normal.sh Executable file
View file

@ -0,0 +1,27 @@
#!/bin/bash
# Replace Unicode characters with their ASCII counterparts
# Usage: ./replace_unicode_with_ascii.sh < input_file
# Read the input from standard input and replace Unicode characters with their ASCII counterparts using a single sed command
sed -e "s/[]/'/g" \
-e 's/[“”]/"/g' \
-e 's/…/.../g' \
-e "s/[–—]/-/g" \
-e 's/ / /g' \
-e 's/«/"/g' \
-e 's/»/"/g' \
-e "s//'/g" \
-e "s//'/g" \
-e 's/„/"/g' \
-e 's/“/"/g' \
-e "s//'/g" \
-e "s//'/g" \
-e 's/„/"/g' \
-e 's/“/"/g' \
-e "s//'/g" \
-e "s//'/g" \
-e 's/•/*/g' \
-e 's/·/*/g' \
-e 's/×/*/g'