]> arthur.barton.de Git - netatalk.git/blob - etc/spotlight/slmod_rdf_parser.y
Add filetype searching and searching by dates
[netatalk.git] / etc / spotlight / slmod_rdf_parser.y
1 %{
2   #include <atalk/standards.h>
3
4   #include <stdbool.h>
5   #include <stdio.h>
6   #include <string.h>
7   #include <time.h>
8
9   #include <gio/gio.h>
10
11   #include <atalk/talloc.h>
12   #include <atalk/logger.h>
13   #include <atalk/errchk.h>
14   #include <atalk/spotlight.h>
15
16   #include "slmod_rdf_map.h"
17
18   struct yy_buffer_state;
19   typedef struct yy_buffer_state *YY_BUFFER_STATE;
20   extern int yylex (void);
21   extern void yyerror (char const *);
22   extern void *yyterminate(void);
23   extern YY_BUFFER_STATE yy_scan_string( const char *str);
24   extern void yy_delete_buffer ( YY_BUFFER_STATE buffer );
25
26   /* forward declarations */
27   static const char *map_expr(const char *attr, char op, const char *val);
28   static const char *map_daterange(const char *dateattr, time_t date1, time_t date2);
29   static time_t isodate2unix(const char *s);
30  
31  /* global vars, eg needed by the lexer */
32   slq_t *srp_slq;
33
34   /* local vars */
35   static gchar *srp_result;
36   static gchar *srp_fts;
37 %}
38
39 %code provides {
40   #define SPRAW_TIME_OFFSET 978307200
41   extern int map_spotlight_to_rdf_query(slq_t *slq, gchar **rdf_result, gchar **fts_result);
42   extern slq_t *srp_slq;
43 }
44
45 %union {
46     int ival;
47     const char *sval;
48     bool bval;
49     time_t tval;
50 }
51
52 %expect 4
53 %error-verbose
54
55 %type <sval> match expr line function
56 %type <tval> date
57
58 %token <sval> WORD
59 %token <bval> BOOL
60 %token FUNC_INRANGE
61 %token DATE_ISO
62 %token OBRACE CBRACE EQUAL UNEQUAL GT LT COMMA QUOTE
63 %left AND
64 %left OR
65 %%
66
67 input:
68 /* empty */
69 | input line
70 ;
71      
72 line:
73 expr                           {
74     srp_result = talloc_asprintf(srp_slq,
75                                  "<rdfq:Condition>\n"
76                                  "  <rdfq:and>\n"
77                                  "    <rdfq:startsWith>\n"
78                                  "      <rdfq:Property name=\"File:Path\" />\n"
79                                  "      <rdf:String>%s</rdf:String>\n"
80                                  "    </rdfq:startsWith>\n"
81                                  "    %s\n"
82                                  "  </rdfq:and>\n"
83                                  "</rdfq:Condition>\n",
84                                  srp_slq->slq_vol->v_path, $1);
85     $$ = srp_result;
86 }
87 ;
88
89 expr:
90 BOOL                             {
91     if ($1 == false)
92         YYACCEPT;
93     else
94         YYABORT;
95 }
96 | match                        {$$ = $1; if ($$ == NULL) YYABORT;}
97 | function                     {$$ = $1;}
98 | OBRACE expr CBRACE           {$$ = talloc_asprintf(srp_slq, "%s\n", $2);}
99 | expr AND expr                {$$ = talloc_asprintf(srp_slq, "<rdfq:and>\n%s\n%s\n</rdfq:and>\n", $1, $3);}
100 | expr OR expr                 {
101     if (strcmp($1, "") == 0 || strcmp($3, "") == 0) {
102         /*
103          * The default Spotlight search term issued by the Finder (10.8) is:
104          * '* == "searchterm" || kMDItemTextContent == "searchterm"'
105          * As it isn't mappable to a single Tracker RDF query, we silently
106          * map ANY FTS query expression being part of an OR compound
107          * expression to a simple filename search.
108          * FTS queries are thus only possible by explicitly requesting
109          * file content FTS search in the Finder on the client (resulting
110          * in a 'kMDItemTextContent == "searchterm"' query).
111          */
112         if (strcmp($1, "") == 0)
113             $$ = talloc_asprintf(srp_slq, $3);
114         else
115             $$ = talloc_asprintf(srp_slq, $1);
116         talloc_free(srp_fts);
117         srp_fts = NULL;
118     } else {
119         $$ = talloc_asprintf(srp_slq, "<rdfq:or>\n%s\n%s\n</rdfq:or>\n", $1, $3);
120     }
121 }
122 ;
123
124 match:
125 WORD EQUAL QUOTE WORD QUOTE     {$$ = map_expr($1, '=', $4);}
126 | WORD UNEQUAL QUOTE WORD QUOTE {$$ = map_expr($1, '!', $4);}
127 | WORD LT QUOTE WORD QUOTE      {$$ = map_expr($1, '<', $4);}
128 | WORD GT QUOTE WORD QUOTE      {$$ = map_expr($1, '>', $4);}
129 | WORD EQUAL QUOTE WORD QUOTE WORD    {$$ = map_expr($1, '=', $4);}
130 | WORD UNEQUAL QUOTE WORD QUOTE WORD {$$ = map_expr($1, '!', $4);}
131 | WORD LT QUOTE WORD QUOTE WORD     {$$ = map_expr($1, '<', $4);}
132 | WORD GT QUOTE WORD QUOTE WORD     {$$ = map_expr($1, '>', $4);}
133 ;
134
135 function:
136 FUNC_INRANGE OBRACE WORD COMMA date COMMA date CBRACE {$$ = map_daterange($3, $5, $7);}
137 ;
138
139 date:
140 DATE_ISO OBRACE WORD CBRACE    {$$ = isodate2unix($3);}
141 | WORD                         {$$ = atoi($1) + SPRAW_TIME_OFFSET;}
142 ;
143
144 %%
145
146 static time_t isodate2unix(const char *s)
147 {
148     struct tm tm;
149
150     if (strptime(s, "%Y-%m-%dT%H:%M:%SZ", &tm) == NULL)
151         return (time_t)-1;
152     return mktime(&tm);
153 }
154
155 static const char *map_daterange(const char *dateattr, time_t date1, time_t date2)
156 {
157     EC_INIT;
158     char *result = NULL;
159     struct spotlight_rdf_map *p;
160     struct tm *tmp;
161     char buf1[64], buf2[64];
162
163     EC_NULL_LOG( tmp = localtime(&date1) );
164     strftime(buf1, sizeof(buf1), "%Y-%m-%dT%H:%M:%SZ", tmp);
165     EC_NULL_LOG( tmp = localtime(&date2) );
166     strftime(buf2, sizeof(buf2), "%Y-%m-%dT%H:%M:%SZ", tmp);
167
168     for (p = spotlight_rdf_map; p->srm_spotlight_attr; p++) {
169         if (strcmp(dateattr, p->srm_spotlight_attr) == 0) {
170                 result = talloc_asprintf(srp_slq,
171                                          "<rdfq:and>\n"
172                                          "  <rdfq:greaterThan>\n"
173                                          "    <rdfq:Property name=\"%s\" />\n"
174                                          "    <rdf:Date>%s</rdf:Date>\n"
175                                          "  </rdfq:greaterThan>\n"
176                                          "  <rdfq:lessThan>\n"
177                                          "    <rdfq:Property name=\"%s\" />\n"
178                                          "    <rdf:Date>%s</rdf:Date>\n"
179                                          "  </rdfq:lessThan>\n"
180                                          "</rdfq:and>\n",
181                                          p->srm_rdf_attr, buf1,
182                                          p->srm_rdf_attr, buf2);
183             break;
184         }
185     }
186
187 EC_CLEANUP:
188     if (ret != 0)
189         return NULL;
190     return result;
191 }
192
193 static char *map_type_search(const char *attr, char op, const char *val)
194 {
195     char *result = NULL;
196
197     for (struct MDTypeMap *p = MDTypeMap; p->mdtm_value; p++) {
198         if (strcmp(p->mdtm_value, val) == 0) {
199             if (!p->mdtm_type)
200                 return NULL;
201             result = talloc_asprintf(srp_slq,
202                                      "<rdfq:%s>\n"
203                                      "  <rdfq:Property name=\"File:Mime\" />\n"
204                                      "  <rdf:String>%s</rdf:String>\n"
205                                      "</rdfq:%s>\n",
206                                      p->mdtm_rdfop,
207                                      p->mdtm_type,
208                                      p->mdtm_rdfop);
209             break;
210         }
211     }
212     return result;
213 }
214
215 static const char *map_expr(const char *attr, char op, const char *val)
216 {
217     EC_INIT;
218     char *result = NULL;
219     struct spotlight_rdf_map *p;
220     time_t t;
221     struct tm *tmp;
222     char buf1[64];
223     bstring q = NULL, search = NULL, replace = NULL;
224     char *rdfop;
225
226     for (p = spotlight_rdf_map; p->srm_spotlight_attr; p++) {
227         if (p->srm_rdf_attr && strcmp(p->srm_spotlight_attr, attr) == 0) {
228             switch (p->srm_type) {
229             case srmt_num:
230                 q = bformat("^%s$", val);
231                 search = bfromcstr("*");
232                 replace = bfromcstr(".*");
233                 bfindreplace(q, search, replace, 0);
234                 result = talloc_asprintf(srp_slq,
235                                          "<rdfq:regex>\n"
236                                          "  <rdfq:Property name=\"%s\" />\n"
237                                          "  <rdf:String>%s</rdf:String>\n"
238                                          "</rdfq:regex>\n",
239                                          p->srm_rdf_attr,
240                                          bdata(q));
241                 bdestroy(q);
242                 break;
243
244             case srmt_str:
245                 q = bformat("^%s$", val);
246                 search = bfromcstr("*");
247                 replace = bfromcstr(".*");
248                 bfindreplace(q, search, replace, 0);
249                 result = talloc_asprintf(srp_slq,
250                                          "<rdfq:regex>\n"
251                                          "  <rdfq:Property name=\"%s\" />\n"
252                                          "  <rdf:String>%s</rdf:String>\n"
253                                          "</rdfq:regex>\n",
254                                          p->srm_rdf_attr,
255                                          bdata(q));
256                 bdestroy(q);
257                 break;
258
259             case srmt_fts:
260                 if (srp_fts) {
261                     yyerror("only single fts query allowed");
262                     EC_FAIL;
263                 }
264                 q = bfromcstr(val);
265                 search = bfromcstr("*");
266                 replace = bfromcstr("");
267                 bfindreplace(q, search, replace, 0);
268                 srp_fts = talloc_strdup(srp_slq, bdata(q));
269                 result = "";
270                 break;
271
272             case srmt_date:
273                 t = atoi(val) + SPRAW_TIME_OFFSET;
274                 EC_NULL( tmp = localtime(&t) );
275                 strftime(buf1, sizeof(buf1), "%Y-%m-%dT%H:%M:%SZ", tmp);
276
277                 switch (op) {
278                 case '=':
279                     rdfop = "equals";
280                 case '<':
281                     rdfop = "lessThan";
282                 case '>':
283                     rdfop = "greaterThan";
284                 default:
285                     yyerror("unknown date comparison");
286                     EC_FAIL;
287                 }
288                 result = talloc_asprintf(srp_slq,
289                                          "<rdfq:%s>\n"
290                                          "  <rdfq:Property name=\"%s\" />\n"
291                                          "  <rdf:Date>%s</rdf:Date>\n"
292                                          "</rdfq:%s>\n",
293                                          rdfop,
294                                          p->srm_rdf_attr,
295                                          buf1,
296                                          rdfop);
297
298                 break;
299
300             case srmt_type:
301                 result = map_type_search(attr, op, val);
302                 break;
303
304             default:
305                 yyerror("unknown Spotlight attribute type");
306                 EC_FAIL;
307             }
308             break;
309         }
310     }
311
312 EC_CLEANUP:
313     if (q)
314         bdestroy(q);
315     if (search)
316         bdestroy(search);
317     if (replace)
318         bdestroy(replace);
319     return result;
320 }
321
322 void yyerror(const char *str)
323 {
324 #ifdef MAIN
325     printf("yyerror: %s\n", str);
326 #else
327     LOG(log_error, logtype_sl, "yyerror: %s", str);
328 #endif
329 }
330  
331 int yywrap()
332 {
333     return 1;
334
335
336 /**
337  * Map a Spotlight RAW query string to a RDF query
338  *
339  * @param[in]     slq            Spotlight query handle
340  * @param[out]    sparql_result  Mapped RDF query, string is allocated in
341  *                               talloc context of slq
342  * @return        0 on success, -1 on error
343  **/
344 int map_spotlight_to_rdf_query(slq_t *slq, gchar **rdf_result, gchar **fts_result)
345 {
346     EC_INIT;
347     YY_BUFFER_STATE s = NULL;
348     srp_result = NULL;
349     srp_fts = NULL;
350
351     srp_slq = slq;
352     s = yy_scan_string(slq->slq_qstring);
353
354     EC_ZERO( yyparse() );
355
356 EC_CLEANUP:
357     if (s)
358         yy_delete_buffer(s);
359     if (ret == 0) {
360         *rdf_result = srp_result;
361         *fts_result = srp_fts;
362     } else {
363         *rdf_result = NULL;
364         *fts_result = NULL;
365     }
366     EC_EXIT;
367 }
368
369 #ifdef MAIN
370 int main(int argc, char **argv)
371 {
372     int ret;
373     YY_BUFFER_STATE s;
374
375     if (argc != 2) {
376         printf("usage: %s QUERY\n", argv[0]);
377         return 1;
378     }
379
380     srp_slq = talloc_zero(NULL, slq_t);
381     struct vol *vol = talloc_zero(srp_slq, struct vol);
382     vol->v_path = "/Volumes/test";
383     srp_slq->slq_vol = vol;
384
385     s = yy_scan_string(argv[1]);
386
387     ret = yyparse();
388
389     yy_delete_buffer(s);
390
391     if (ret == 0)
392         printf("RDF:\n%s\nFTS: %s\n",
393                srp_result ? srp_result : "(empty)",
394                srp_fts ? srp_fts : "(none)");
395     return 0;
396
397 #endif