source: main/branches/3D/openPLM/plmapp/query_parser.py @ 662

Revision 662, 3.8 KB checked in by pcosquer, 9 years ago (diff)

3D branch: merge changes from trunk (rev [661])

RevLine 
[460]1import re
2import string
3
4from lepl import *
5
6try:
7    from haystack.query import SQ
8except Exception:
9    SQ = None
10
11split = re.compile("[%s]" % re.escape(string.punctuation)).split
12
13class Alternatives(List):
14
15    def to_SQ(self):
16        sq = SQ()
17        for elem in self:
18            sq |= elem.to_SQ()
19        return sq
20
21
22class Conjunctives(List):
23    def to_SQ(self):
24        sq = SQ()
25        for elem in self:
26            sq &= elem.to_SQ()
27        return sq
28
29class Query(Conjunctives):
30    pass
31
[662]32
33def convert_number(query, qualifier):
34    """ If query represents a number, replaces it with an OR query built with
35        several formatting of the number: for example, it replaces 51 with 51
36        or 051 or 0051... so that "51" matches "part-0051".
37
38        If *query* does not represent a number, it returns a simple
39        SQ(qualifier -> query) object.
40    """
41    sq = SQ()
42    if query.isdigit():
43        or_ = SQ()
44        numbers = ["0" * x + query for x in range(10)]
45        for nb in numbers:
46            or_ |= SQ(**{ qualifier : nb })
47        sq &= or_
48    else:
49        sq &= SQ(**{ qualifier : query })
50    return sq
51
[460]52class Text(List):
[662]53   
[460]54    def to_SQ(self):
55        if len(self) == 2:
56            qualifier, text = self
57            qualifier = qualifier[1]
58        else:
59            qualifier = "content"
60            text = self[0]
61        text = text.strip().lower()
62        filters = {}
[662]63        # here we replace a number with an OR query built with several formatting
64        # of the number:
65        # for example, we replace 51 with 51 or 051 or 0051...
66        sq = SQ()
[460]67        if text.endswith("*"):
68            text = text.rstrip("*")
69            items = split(text)
70            for item in items[:-1]:
[662]71                sq &= convert_number(item, qualifier)
[460]72            suffix = "*" if qualifier == "content" else ""
73            sq &= SQ(**{ qualifier + "__startswith" : items[-1]+suffix})
74        else:
[662]75            sq = convert_number(text, qualifier)
76        return sq
[460]77
78class Not(List):
79
80    def to_SQ(self):
81        return ~ self[0].to_SQ()
82
83def get_query_parser():
84    expr = Delayed()
85    query = Delayed()
86    alternatives = Delayed()
87    operators = Literals("OR", "AND", "NOT", ")", "(")
88    qualifier      = Word(Any(string.ascii_letters)) & Drop(Any(':='))  > 'qualifier'
89    word           = ~Lookahead(operators) & (Word())
90    phrase         = String()
91    text           = phrase | word
92    word_or_phrase = (Optional(qualifier) & text) > Text
93    par_op         = ~Any("(")
94    par_cl         = ~Any(")")
95    separator_and  = Drop('AND')
96    separator_or   = Drop('OR')
97    with DroppedSpace():
98        not_expr       = ~Literals("NOT") & expr > Not
99        expr           += (par_op & alternatives & par_cl) | not_expr | word_or_phrase
100        query          += expr[1:]    > Query
101        conjunctives   = query[:, separator_and]        > Conjunctives
102        alternatives   += conjunctives[:, separator_or] > Alternatives
103   
104    parser = alternatives
105    parser.config.no_full_first_match()
106    return parser.parse_string
107
108__all__ = ["get_query_parser", "Conjunctives", "Alternatives", "Query",
109          "Text"]
110
111if __name__ == "__main__":
112    c = get_query_parser()
113    for s in ('all of these words "with this phrase" '
114                       'OR that OR this site:within.site '
115                       'filetype:ps from:lastweek',
116                "A:aa",
117                "a OR",
118                ": dd",
119                "NOT data a",
120                "(a b c)",
121                "( a b c)",
122                ",v;d!:;,:;",
123                "NOT ( a  b )  OR (abc OR NOT De)",
124                "( )))",
125                '"dfdl',
126                " a AND NOT b OR CC",
127                ):
128        s = s.strip()
129        print s
130        print c(s)[0]
Note: See TracBrowser for help on using the repository browser.