source: main/branches/3D/openPLM/plmapp/query_parser.py @ 817

Revision 817, 3.9 KB checked in by pcosquer, 9 years ago (diff)

merge changes from trunk [814]
tests fails...

RevLine 
[460]1import re
2import string
3
4from lepl import *
5
6try:
7    from haystack.query import SQ
8except Exception:
9    SQ = None
10
[817]11split = re.compile("[%s]+" % re.escape(string.punctuation)).split
[460]12
13class Alternatives(List):
14
15    def to_SQ(self):
16        sq = SQ()
17        for elem in self:
18            sq |= elem.to_SQ()
19        return sq
20
21
22class Conjunctives(List):
23    def to_SQ(self):
24        sq = SQ()
25        for elem in self:
26            sq &= elem.to_SQ()
27        return sq
28
29class Query(Conjunctives):
30    pass
31
[662]32
33def convert_number(query, qualifier):
34    """ If query represents a number, replaces it with an OR query built with
35        several formatting of the number: for example, it replaces 51 with 51
36        or 051 or 0051... so that "51" matches "part-0051".
37
38        If *query* does not represent a number, it returns a simple
39        SQ(qualifier -> query) object.
40    """
41    sq = SQ()
42    if query.isdigit():
43        or_ = SQ()
44        numbers = ["0" * x + query for x in range(10)]
45        for nb in numbers:
46            or_ |= SQ(**{ qualifier : nb })
47        sq &= or_
48    else:
49        sq &= SQ(**{ qualifier : query })
50    return sq
51
[460]52class Text(List):
[662]53   
[460]54    def to_SQ(self):
55        if len(self) == 2:
56            qualifier, text = self
57            qualifier = qualifier[1]
58        else:
59            qualifier = "content"
60            text = self[0]
61        text = text.strip().lower()
62        filters = {}
[662]63        # here we replace a number with an OR query built with several formatting
64        # of the number:
65        # for example, we replace 51 with 51 or 051 or 0051...
66        sq = SQ()
[817]67        if text == "*":
68            qualifier = "content"
[460]69        if text.endswith("*"):
70            text = text.rstrip("*")
71            items = split(text)
72            for item in items[:-1]:
[662]73                sq &= convert_number(item, qualifier)
[817]74            suffix = "*" if qualifier in ("text", "content") else ""
[460]75            sq &= SQ(**{ qualifier + "__startswith" : items[-1]+suffix})
76        else:
[662]77            sq = convert_number(text, qualifier)
78        return sq
[460]79
80class Not(List):
81
82    def to_SQ(self):
83        return ~ self[0].to_SQ()
84
85def get_query_parser():
86    expr = Delayed()
87    query = Delayed()
88    alternatives = Delayed()
89    operators = Literals("OR", "AND", "NOT", ")", "(")
90    qualifier      = Word(Any(string.ascii_letters)) & Drop(Any(':='))  > 'qualifier'
91    word           = ~Lookahead(operators) & (Word())
92    phrase         = String()
93    text           = phrase | word
94    word_or_phrase = (Optional(qualifier) & text) > Text
95    par_op         = ~Any("(")
96    par_cl         = ~Any(")")
97    separator_and  = Drop('AND')
98    separator_or   = Drop('OR')
99    with DroppedSpace():
100        not_expr       = ~Literals("NOT") & expr > Not
101        expr           += (par_op & alternatives & par_cl) | not_expr | word_or_phrase
102        query          += expr[1:]    > Query
103        conjunctives   = query[:, separator_and]        > Conjunctives
104        alternatives   += conjunctives[:, separator_or] > Alternatives
105   
106    parser = alternatives
107    parser.config.no_full_first_match()
108    return parser.parse_string
109
110__all__ = ["get_query_parser", "Conjunctives", "Alternatives", "Query",
111          "Text"]
112
113if __name__ == "__main__":
114    c = get_query_parser()
115    for s in ('all of these words "with this phrase" '
116                       'OR that OR this site:within.site '
117                       'filetype:ps from:lastweek',
118                "A:aa",
119                "a OR",
120                ": dd",
121                "NOT data a",
122                "(a b c)",
123                "( a b c)",
124                ",v;d!:;,:;",
125                "NOT ( a  b )  OR (abc OR NOT De)",
126                "( )))",
127                '"dfdl',
128                " a AND NOT b OR CC",
129                ):
130        s = s.strip()
131        print s
132        print c(s)[0]
Note: See TracBrowser for help on using the repository browser.