Comments (12)
OK, I figured that expr
was the recursive filter. But I get now that logExp is implicitly supported because and/or is in there.
So I guess ExpressionParser
will try one of the parsers in the OneOf
parser, then attempt to match with one of the operators, and then go again. So that is why the filter parser works for userName eq \"foo\" and age lt 42
.
It definitely helps!
from pidgin.
My current attempt looks like this:
using Pidgin;
using Scim.ScimFilters;
using static Pidgin.Parser;
using static Pidgin.Parser<char>;
namespace Scim.Pidgin;
public class ScimFilterParser
{
/// <summary>
/// A parser that parses and returns a single alpha character case insensitive.
/// </summary>
/// <remarks><code>
/// A-Z / a-z
/// </remarks>
private static Parser<char, char> Alpha { get; }
= CIOneOf('A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z');
/// <summary>
/// A parser that parses and returns a single alpha numerical character case insensitive.
/// </summary>
/// <remarks><code>
/// A-Z / a-z / 0-9
/// </code></remarks>
private static Parser<char, char> AlphaNum { get; }
= OneOf(Alpha, Digit);
/// <remarks><code>
/// DIGIT / "A" / "B" / "C" / "D" / "E" / "F"
/// </code></remarks>
private static Parser<char, char> HexDig { get; }
= OneOf(Digit).Or(OneOf('A', 'B', 'C', 'D', 'E', 'F'));
/// <remarks><code>
/// pchar = unreserved / pct-encoded / sub-delims / ":" / "@"
/// pct-encoded = "%" HEXDIG HEXDIG
/// unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
/// sub-delims = "!" / "$" / "&" / "'" / "(" / ")" / "*" / "+" / "," / ";" / "="
/// </code></remarks>
private static Parser<char, char> PChar { get; }
= OneOf(
AlphaNum, // unreserved
OneOf('-', '.', '_', '~', // unreserved
'!', '$', '&', '\'', '(', ')', '*', '+', ',', ';', '=', // sub-delims
':', '@'),
Map((_, a, b) => (char)Convert.ToUInt32($"{a}{b}", fromBase: 16), Char('%'), HexDig, HexDig)); // pct-encoded
/// <summary>
/// A URN parser. Note, only supports <c>assigned-name</c> form.
/// </summary>
/// <remarks><code>
/// assigned-name = "urn" ":" NID ":" NSS
/// NID = (alphanum) 0*30(ldh) (alphanum)
/// ldh = alphanum / "-"
/// NSS = pchar *(pchar / "/")
/// </code></remarks>
public static Parser<char, string> Urn { get; }
= CreateUrn().Labelled("URN");
public static Parser<char, string> CreateUrn()
{
var start = CIString("urn").Before(Char(':'));
var nid = Map(
static (start, rest) => start + rest,
AlphaNum,
OneOf(AlphaNum, Char('-'))
.ManyString()
.Where(x => x.Length < 31 && x.Length > 0 && x[^1] != '-')
).Before(Char(':'));
var nss = Map(
static (start, rest) => start + rest,
PChar,
OneOf(PChar, Char('/')).ManyString());
return Map(
static (start, nid, nss) => $"{start}:{nid}:{nss}",
start,
nid,
nss);
}
/// <summary>
/// A nameChar parser, that parses a single character.
/// </summary>
/// <remarks><code>
/// "-" / "_" / DIGIT / ALPHA
/// </code></remarks>
public static Parser<char, char> NameChar { get; }
= OneOf(AlphaNum, Char('_'), Char('-'))
.Labelled("name character");
/// <summary>
/// An attribute name (ATTRNAME) parser.
/// </summary>
/// <remarks><code>
/// ALPHA *(nameChar)
/// </code></remarks>
public static Parser<char, string> AttrName { get; }
= Map(
static (first, rest) => first + rest,
Alpha,
NameChar.ManyString())
.Labelled("attribute name");
/// <summary>
/// A sub-attribute of a complex attribute (subAttr) parser.
/// </summary>
/// <remarks><code>
/// "." ATTRNAME
/// </code></remarks>
public static Parser<char, string> SubAttr { get; }
= Map(
static (period, subAttr) => period + subAttr,
Char('.'),
AttrName)
.Labelled("sub attribute");
private static Parser<char, string> AttrPathWithoutUrn
= Map(static (attr, subAttrs) => attr + subAttrs,
AttrName,
SubAttr.ManyString());
/// <remarks><code>
/// attrPath = [URI ":"] ATTRNAME *1subAttr
/// ; SCIM attribute name
/// ; URI is SCIM "schema" URI
/// </code></remarks>
public static Parser<char, AttrPath> AttrPath { get; }
= Try(
// Because the NSS part of URN also includes
// AttrPathWithoutUrn, it will consume the
// `ATTRNAME *1subAttr` part of attribute path.
// The MapWithInput gets the candidate attr path
// from the URN parse result and validates it using
// the AttrPathWithoutUrn parser.
Urn.MapWithInput<AttrPath?>((urn, _) =>
{
var index = urn.LastIndexOf(':');
var attrPath = urn[(index + 1)..];
return AttrPathWithoutUrn.Parse(attrPath).Success
? new AttrPath(urn[..index].ToString(), attrPath.ToString())
: null;
})
.Assert(x => x.HasValue)
.Select(x => x!.Value))
.Or(AttrPathWithoutUrn.Select(x => new AttrPath(x)))
.Labelled("attribute path");
/// <remarks><code>
/// compareOp = "eq" / "ne" / "co" /
/// "sw" / "ew" /
/// "gt" / "lt" /
/// "ge" / "le"
/// </code></remarks>
public static Parser<char, Operation> CompareOp { get; }
= OneOf(
// eq or ew
CIChar('e').Then(
CIChar('q').ThenReturn(Operation.Equal)
.Or(CIChar('w').ThenReturn(Operation.EndsWith))
),
CIString("ne").Select(_ => Operation.NotEqual),
// gt or ge
CIChar('g').Then(
CIChar('t').ThenReturn(Operation.GreaterThan)
.Or(CIChar('e').ThenReturn(Operation.GreaterThanOrEqual))
),
// lt or le
CIChar('l').Then(
CIChar('t').ThenReturn(Operation.LessThan)
.Or(CIChar('e').ThenReturn(Operation.LessThanOrEqual))
),
CIString("co").Select(_ => Operation.Contains),
CIString("sw").Select(_ => Operation.StartsWith),
CIString("pr").Select(_ => Operation.Present))
.Labelled("compare operation");
/// <remarks><code>
/// compValue = false / null / true / number / string
/// ; rules from JSON (RFC 7159)
/// </code></remarks>
public static Parser<char, Value> CompValue { get; }
= CreateCompValue().Labelled("compare value");
private static Parser<char, Value> CreateCompValue()
{
var booleans = CIString("false").Or(CIString("true"))
.Select(x => new Value(ValueKind.Boolean, x.ToLowerInvariant()));
var nulls = CIString("null")
.Select(_ => new Value(ValueKind.Null, null));
var quote = '"';
var strings = Token(c => c != quote).ManyString().Between(Char(quote))
.Select(x => new Value(ValueKind.String, x));
var numbers = CreateNumberParser();
return OneOf(numbers, strings, booleans, nulls);
}
/// <remarks><code>
/// number = [ minus ] int [ frac ] [ exp ]
/// decimal-point = %x2E ; .
/// digit1-9 = %x31-39 ; 1-9
/// e = %x65 / %x45 ; e E
/// exp = e [ minus / plus ] 1*DIGIT
/// frac = decimal-point 1*DIGIT
/// int = zero / ( digit1-9 *DIGIT )
/// minus = %x2D ; -
/// plus = %x2B ; +
/// zero = %x30 ; 0
/// </code></remarks>
private static Parser<char, Value> CreateNumberParser()
{
var isOneToNineDigit = Parser<char>.Token((char c) => c != '0' && char.IsDigit(c));
var nonLeadingZeroInteger = Map((x, rest) => x + rest, isOneToNineDigit, Digit.ManyString());
var plus = Char('+');
var minus = Char('-');
var decimalPoint = Char('.');
var zero = Char('0');
var singleZero = zero.Before(Not(Digit)).Select(_ => "0");
var integer = nonLeadingZeroInteger.Or(singleZero);
var frac = Map(
static (decimalPoint, digets) => decimalPoint + digets,
decimalPoint,
nonLeadingZeroInteger);
var exp = Map(
(e, plusMinus, num) => plusMinus.Match(pm => $"{e}{pm}{num}", () => e + num),
CIChar('E'),
plus.Or(minus).Optional(),
nonLeadingZeroInteger);
return Map(
static (minus, integer, frac, exp) =>
{
var kind = frac.HasValue || exp.HasValue ? ValueKind.Double : ValueKind.Integer;
var value = minus.Match<string?>(x => x + integer, () => integer);
value = frac.Match(x => value + x, () => value);
value = exp.Match(x => value + x, () => value);
return new Value(kind, value);
},
minus.Optional(),
integer,
frac.Optional(),
exp.Optional());
}
/// <remarks><code>
/// attrExp = (attrPath SP "pr") /
/// (attrPath SP compareOp SP compValue)
/// </code></remarks>
public static Parser<char, AttrExpression> AttrExp
= CreateAttrExp();
private static Parser<char, AttrExpression> CreateAttrExp()
{
var space = Char(' ');
var attrExp = Map(
static (path, opVal) => new AttrExpression(path, opVal.Operation, opVal.Value),
AttrPath.Before(space),
CompareOp.Bind(
op =>
{
return op == Operation.Present
? Not(space).Select(_ => Value.None)
: space.Then(CompValue);
},
(op, value) => (Operation: op, Value: value)));
return attrExp;
}
/// <remarks><code>
/// FILTER = attrExp / logExp / valuePath / *1"not" "(" FILTER ")"
/// </code></remarks>
public static Parser<char, FilterExpression> Filter
= CreateFilter();
private static Parser<char, FilterExpression> CreateFilter()
{
...
}
}
from pidgin.
Have a look in the Pidgin.Expression
namespace, that's where I put the stuff to help with recursive grammars.
In your case, I think the code would look something like (untested):
And = String("and")
.ThenReturn<Func<FilterExpression, FilterExpression, FilterExpression>>((x, y) => new AndExpression(x, y));
Or = String("or")
.ThenReturn<Func<FilterExpression, FilterExpression, FilterExpression>>((x, y) => new OrExpression(x, y));
Filter = ExpressionParser.Build(expr =>
OneOf( // parse a single term
AttrExp,
ValuePath,
String("not").Then(Parenthesised(expr)).Select(x => new NotExpression(x)),
Parenthesised(expr)
),
new[] { Operator.InfixL(And), Operator.InfixL(Or) } // define operators to combine terms
);
(I more or less copied this from the example, I know the docs for ExpressionParser
are somewhat lacking!) The BNF doesn't have anything to say about precedence or associativity, so I took a punt; if you need to change that you'd adjust the line marked // define operators
.
A couple of general pointers which you may already know:
- Recursive descent parsers (including parser combinators) don't natively support left recursion. (
ExpressionParser
gets around this by parsing a flat list of operators and then re-associating it in a second pass.) - In general, you can't translate an arbitrary BNF spec directly into parsing code 1:1 (no matter what sort of parsing tool you use). Your grammar is both left- and right-recursive, which is perfectly valid BNF but not very easy to parse out of the box (BNF defines production rules, not parsing rules).
- Parsing strategies tend to be good at either left recursion or right recursion (not both).
- Sometimes it Just Doesn't Work (as with left recursion for LL/recursive-descent parsers); sometimes it causes an asymptotic performance problem.
- Usually, when translating a BNF spec to code, people refactor the grammar so that it's either left- or right-recursive (depending on the type of parsing tool they're using).
- This usually involves defining a rule for a single term and a second rule for a list of such terms, just like in the parser above. See here for an explanation
- Parsing strategies tend to be good at either left recursion or right recursion (not both).
Hope this helps!
from pidgin.
Thank you very much for your reply. I am quite a newbie when it comes to parsers, so appreciate any input (its been 16 years since I had that compiler course at uni).
What I gather is that it should be possible with Pidgin, so what I will attempt is to do something much simpler like the Pidgin expression sample and try to understand what goes on with that. Your docs have been very useful so far, but if you have time at some point, just adding some inline comments in the expression sample would probably help quite a bit.
Anyway, my current status: Right now, I do have something that doesn't stack overflow (an improvement), but also doesn't work with the and
and or
cases:
private static Parser<char, FilterExpression> CreateFilter()
{
var and = String("and")
.ThenReturn<Func<FilterExpression, FilterExpression, FilterExpression>>(
static (left, right) => new AndExpression(left, right));
var or = String("or")
.ThenReturn<Func<FilterExpression, FilterExpression, FilterExpression>>(
static (left, right) => new OrExpression(left, right));
var filter = ExpressionParser.Build(expr =>
OneOf( // parse a single term
AttrExp.Cast<FilterExpression>(),
String("not").Then(Parenthesised(expr)).Select<FilterExpression>(static filter => new NotExpression(filter)),
Parenthesised(expr)
),
new[] { InfixL(and), InfixL(or) } // define operators to combine terms
);
return filter;
}
This works for "userName eq \"foo\""
, but not "userName eq \"foo\" and age lt 42"
. In the latter case, it just returns an AttrExpression
for the first part, and not an AndExpression
with both parts.
Note: I have purposely not ValuePath
part of the filter language yet, I want to get logExp
parts added in their first.
Ps. once I get these bits working and finish the rest of the SCIM API implementation, the code will be open sourced, for what its worth.
from pidgin.
Does AttrExp
consume trailing whitespace?
from pidgin.
I do not think AttrExp
consume trailing whitespace, no. It looks like this:
var space = Char(' ');
var attrExp = Map(
static (path, opVal) => new AttrExpression(path, opVal.Operation, opVal.Value),
AttrPath.Before(space),
CompareOp.Bind(
op =>
{
return op == Operation.Present
? Not(space).Select(_ => Value.None)
: space.Then(CompValue);
},
(op, value) => (Operation: op, Value: value)));
I guess it should not, since there might not be any trailing whitespace, and that would be legal.
from pidgin.
The input userName eq \"foo\" and age lt 42
has a whitespace character between the AttrExp
and the and
token, so that's why it's failing to recognise the and
. I generally suggest applying SkipWhitespaces
after each token
from pidgin.
Ahh of course. The spaces, SP
, are also specified in the ABNF, so they should be there.
If I change and
and or
to this it works:
var and = String("and").Between(SkipWhitespaces)
.ThenReturn<Func<FilterExpression, FilterExpression, FilterExpression>>(
static (left, right) => new AndExpression(left, right));
var or = String("or").Between(SkipWhitespaces)
.ThenReturn<Func<FilterExpression, FilterExpression, FilterExpression>>(
static (left, right) => new OrExpression(left, right));
But I am curious why. In the filter definition, AttrExp
comes first in the OneOf
parser, so I would think it tries that first, and that just matches on that. Why does it actually try the Parenthesised
(i'm guessing that is actually the logExp part of the abnf) if the AttrExp matches?
var filter = ExpressionParser.Build(expr =>
OneOf( // parse a single term
AttrExp.Cast<FilterExpression>(),
Parenthesised(expr), // logExp?
String("not").Then(Parenthesised(expr)).Select<FilterExpression>(static filter => new NotExpression(filter))
),
new[] { InfixL(and), InfixL(or) } // define operators to combine terms
);
from pidgin.
Sorry, this may have been unclear: expr
is a recursive reference to filter
. The grammar says a FILTER
can contain a parenthesised child FILTER
(optionally preceded by not
):
FILTER = ... / *1"not" "(" FILTER ")"
So that's what I was trying to do with the second two items in the OneOf
. logExp
doesn't have an exact analogue in my code, though I spose you could say that the // define operators
line does the job of logExp
.
Hope this helps!
from pidgin.
So I think I managed to get everything working.
This is the filter parts I ended up with, if anybody stumbles onto this issue in the future. Any suggestions for improvements are of course welcome.
/// <remarks><code>
/// FILTER = attrExp / logExp / valuePath / *1"not" "(" FILTER ")"
// </code></remarks>
public static Parser<char, FilterExpression> Filter
= CreateFilter();
private static Parser<char, T> Tok<T>(Parser<char, T> token)
=> Try(token).Before(SkipWhitespaces);
private static Parser<char, string> Tok(string token)
=> Tok(String(token));
private static Parser<char, T> Parenthesised<T>(Parser<char, T> parser)
=> parser.Between(Tok(Char('(')), Tok(Char(')')));
private static Parser<char, T> Brackets<T>(Parser<char, T> parser)
=> parser.Between(Tok(Char('[')), Tok(Char(']')));
private static Parser<char, FilterExpression> CreateFilter()
{
var and = Tok("and")
.ThenReturn<Func<FilterExpression, FilterExpression, FilterExpression>>(
static (left, right) => new AndExpression(left, right));
var or = Tok("or")
.ThenReturn<Func<FilterExpression, FilterExpression, FilterExpression>>(
static (left, right) => new OrExpression(left, right));
var filter = default(Parser<char, FilterExpression>);
filter = ExpressionParser.Build(expr =>
OneOf( // parse a single term
Tok(Map<char, AttrPath, FilterExpression, FilterExpression>(static (path, filter) => new GroupExpression(path, filter),
Tok(AttrPath),
Brackets(expr))), // valuePath
Tok("not")
.Then(Parenthesised(expr))
.Select<FilterExpression>(static filter => new NotExpression(filter)),
Tok(AttrExp.Cast<FilterExpression>()),
Parenthesised(expr)
),
new[] { InfixL(and), InfixL(or) } // and has higher precedence than or
);
return filter;
}
Thanks again @benjamin-hodgson.
Do you have a sponsor button or similar, that I can use to throw a little coin at you for all the help you have provided?
from pidgin.
Oh, that's very kind of you but won't be necessary! I have a full time job; I just do open source work for fun. Hopefully it never becomes time-consuming or stressful enough that I feel like I need to make money for it to be sustainable.
If you're keen to contribute, the best way you could help would be to pick up a bug or feature request from the issues board! For example, you might be interested in contributing some documentation improvements for ExpressionParser
, now that you have some experience with it. (Obviously completely optional, no pressure or expectations from me.)
from pidgin.
OK, I appreciate that.
I would love to be able to contribute docs, but I am still at the copy/paste-hack-until-it-works level of understanding, but I'll will keep playing with Pidgin and jump back in if I get comfortable enough.
Thanks again!
from pidgin.
Related Issues (20)
- Question: can this URN parser be improved? HOT 2
- Expression handling examples/documentation HOT 1
- Parsing context HOT 4
- Is there a good way to turn Digit.Repeat(n) into a string? HOT 2
- How do I keep both sides of a match? HOT 4
- Question: How to parse a list of strings? HOT 1
- Tried to rewind past the start of the input. Please report this as a bug in Pidgin! HOT 1
- Parsing pseudo freeform text HOT 4
- Matching an exact string HOT 1
- Add support for .net framework in the new versions HOT 1
- Docs website is down HOT 9
- Support trimming and AOT HOT 5
- AOT generic expansion warning HOT 15
- Can you write an example for a Luau parser? HOT 2
- Consider writing tutorials instead of examples HOT 1
- Confusing API: unexpected EOF error HOT 1
- Can't locate documentation: parsing non-char streams. HOT 4
- Maybe Bug: Many/UnsignedInt Operator ignores Whitespace HOT 6
- Need help with `Try` or `Int(10)` HOT 3
Recommend Projects
-
React
A declarative, efficient, and flexible JavaScript library for building user interfaces.
-
Vue.js
🖖 Vue.js is a progressive, incrementally-adoptable JavaScript framework for building UI on the web.
-
Typescript
TypeScript is a superset of JavaScript that compiles to clean JavaScript output.
-
TensorFlow
An Open Source Machine Learning Framework for Everyone
-
Django
The Web framework for perfectionists with deadlines.
-
Laravel
A PHP framework for web artisans
-
D3
Bring data to life with SVG, Canvas and HTML. 📊📈🎉
-
Recommend Topics
-
javascript
JavaScript (JS) is a lightweight interpreted programming language with first-class functions.
-
web
Some thing interesting about web. New door for the world.
-
server
A server is a program made to process requests and deliver data to clients.
-
Machine learning
Machine learning is a way of modeling and interpreting data that allows a piece of software to respond intelligently.
-
Visualization
Some thing interesting about visualization, use data art
-
Game
Some thing interesting about game, make everyone happy.
Recommend Org
-
Facebook
We are working to build community through open source technology. NB: members must have two-factor auth.
-
Microsoft
Open source projects and samples from Microsoft.
-
Google
Google ❤️ Open Source for everyone.
-
Alibaba
Alibaba Open Source for everyone
-
D3
Data-Driven Documents codes.
-
Tencent
China tencent open source team.
from pidgin.