1 | /** |
---|
2 | * Author : Gérald FENOY |
---|
3 | * |
---|
4 | * Copyright 2017 GeoLabs SARL. All rights reserved. |
---|
5 | * |
---|
6 | * This work was supported by public funds received in the framework of GEOSUD, |
---|
7 | * a project (ANR-10-EQPX-20) of the program "Investissements d'Avenir" managed |
---|
8 | * by the French National Research Agency |
---|
9 | * |
---|
10 | * Permission is hereby granted, free of charge, to any person obtaining a copy |
---|
11 | * of this software and associated documentation files (the "Software"), to deal |
---|
12 | * in the Software without restriction, including without limitation the rights |
---|
13 | * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
---|
14 | * copies of the Software, and to permit persons to whom the Software is |
---|
15 | * furnished to do so, subject to the following conditions: |
---|
16 | * |
---|
17 | * The above copyright notice and this permission notice shall be included in |
---|
18 | * all copies or substantial portions of the Software. |
---|
19 | * |
---|
20 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
---|
21 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
---|
22 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
---|
23 | * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
---|
24 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
---|
25 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN |
---|
26 | * THE SOFTWARE. |
---|
27 | */ |
---|
28 | |
---|
29 | |
---|
30 | #include "service.h" |
---|
31 | #include "service_internal.h" |
---|
32 | #include "sshapi.h" |
---|
33 | #include "server_internal.h" |
---|
34 | |
---|
35 | #include <sys/socket.h> |
---|
36 | #include <sys/un.h> |
---|
37 | |
---|
38 | #include <libxml/tree.h> |
---|
39 | #include <libxml/parser.h> |
---|
40 | #include <libxml/xpath.h> |
---|
41 | #include <libxml/xpathInternals.h> |
---|
42 | |
---|
43 | #include <libxslt/xslt.h> |
---|
44 | #include <libxslt/xsltInternals.h> |
---|
45 | #include <libxslt/transform.h> |
---|
46 | #include <libxslt/xsltutils.h> |
---|
47 | |
---|
48 | #include <dirent.h> |
---|
49 | extern "C" { |
---|
50 | |
---|
51 | /** |
---|
52 | * FinalizeHPC ZOO Service : |
---|
53 | * This service is used to inform a ZOO-Kernel waiting for the end of the |
---|
54 | * execution of a HPC service |
---|
55 | */ |
---|
56 | ZOO_DLL_EXPORT int FinalizeHPC(maps*& conf,maps*& inputs,maps*& outputs){ |
---|
57 | // Retrieve the jobid corresponding to the identifier generated by SLURM |
---|
58 | // by reading the file generated when running the SBATCH file |
---|
59 | map* jobid=getMapFromMaps(inputs,"jobid","value"); |
---|
60 | struct sockaddr_un addr; |
---|
61 | char buf[100]="3"; |
---|
62 | int fd,rc=NULL; |
---|
63 | int i=0; |
---|
64 | map* usid=getMapFromMaps(conf,"lenv","usid"); |
---|
65 | map* tmpPath=getMapFromMaps(conf,"main","tmpPath"); |
---|
66 | |
---|
67 | char *flenv = |
---|
68 | (char *) malloc ((strlen (tmpPath->value) + |
---|
69 | strlen (jobid->value) + 12) * sizeof (char)); |
---|
70 | sprintf (flenv, "%s/%s_lenv.cfg", tmpPath->value, jobid->value); |
---|
71 | maps* m = (maps *) malloc (MAPS_SIZE); |
---|
72 | m->child=NULL; |
---|
73 | m->next=NULL; |
---|
74 | map* configId=NULL; |
---|
75 | |
---|
76 | |
---|
77 | if(conf_read(flenv, m) != 2){ |
---|
78 | configId=getMapFromMaps(m,"lenv","configId"); |
---|
79 | setMapInMaps(conf,"lenv","configId",configId->value); |
---|
80 | }else{ |
---|
81 | setMapInMaps(conf,"lenv","message",_("Unable to read the lenv section file of the requested jobid")); |
---|
82 | return SERVICE_FAILED; |
---|
83 | } |
---|
84 | |
---|
85 | SSHCON *test=ssh_connect(conf); |
---|
86 | /*if(test==NULL){ |
---|
87 | setMapInMaps(conf,"lenv","message",_("Unable to connect using ssh.")); |
---|
88 | return SERVICE_FAILED; |
---|
89 | }*/ |
---|
90 | |
---|
91 | char *logPath=(char*)malloc((strlen(tmpPath->value)+strlen(jobid->value)+12)*sizeof(char)); |
---|
92 | sprintf(logPath,"%s/exec_out_%s",tmpPath->value,jobid->value); |
---|
93 | struct stat f_status; |
---|
94 | int ts=stat(logPath, &f_status); |
---|
95 | char* fcontent = NULL; |
---|
96 | if(ts==0) { |
---|
97 | fcontent=(char*)malloc(sizeof(char)*(f_status.st_size+1)); |
---|
98 | FILE* f=fopen(logPath,"rb"); |
---|
99 | fread(fcontent,f_status.st_size,1,f); |
---|
100 | int fsize=f_status.st_size; |
---|
101 | fcontent[fsize]=0; |
---|
102 | fclose(f); |
---|
103 | }else{ |
---|
104 | setMapInMaps(conf,"lenv","message",_("No service with this jobid can be found")); |
---|
105 | return SERVICE_FAILED; |
---|
106 | } |
---|
107 | free(logPath); |
---|
108 | // Run scontrol to check if the service execution ended. |
---|
109 | // Store all the informations returned by scontrol command as a cfg file to |
---|
110 | // be parsed back by the ZOO-Kernel waiting for the execution of the remote |
---|
111 | // service |
---|
112 | maps* tmpMaps=createMaps("henv"); |
---|
113 | char* command=(char*)malloc((126)*sizeof(char)); |
---|
114 | sprintf(command,"scontrol show jobid | grep -A24 JobId=%s",fcontent); |
---|
115 | if(ssh_exec(conf,command,ssh_get_cnt(conf))==0){ |
---|
116 | free(command); |
---|
117 | setMapInMaps(conf,"lenv","message",_("Failed to run scontrol remotely")); |
---|
118 | // TODO: check status in db and if available continue in other case return SERVICE_FAILED |
---|
119 | return SERVICE_FAILED; |
---|
120 | }else{ |
---|
121 | free(command); |
---|
122 | logPath=(char*)malloc((strlen(tmpPath->value)+strlen(usid->value)+11)*sizeof(char)); |
---|
123 | sprintf(logPath,"%s/exec_out_%s",tmpPath->value,usid->value); |
---|
124 | int ts=stat(logPath, &f_status); |
---|
125 | if(ts==0) { |
---|
126 | fcontent=(char*)malloc(sizeof(char)*(f_status.st_size+1)); |
---|
127 | FILE* f=fopen(logPath,"rb"); |
---|
128 | fread(fcontent,f_status.st_size,1,f); |
---|
129 | int fsize=f_status.st_size; |
---|
130 | fcontent[fsize]=0; |
---|
131 | fclose(f); |
---|
132 | free(logPath); |
---|
133 | char *token, *saveptr; |
---|
134 | token = strtok_r (fcontent, " ", &saveptr); |
---|
135 | while (token != NULL) |
---|
136 | { |
---|
137 | char *token1, *saveptr1; |
---|
138 | char *tmpToken=strdup(token); |
---|
139 | token1 = strtok_r (tmpToken, "=", &saveptr1); |
---|
140 | int isNext=-1; |
---|
141 | int hasTwoElements=0; |
---|
142 | char *name=NULL; |
---|
143 | while (token1 != NULL) |
---|
144 | { |
---|
145 | if(hasTwoElements==0) |
---|
146 | name=strdup(token1); |
---|
147 | if(hasTwoElements<1) |
---|
148 | hasTwoElements+=1; |
---|
149 | else{ |
---|
150 | char *value=strdup(token1); |
---|
151 | if(value[strlen(value)-1]=='\n') |
---|
152 | value[strlen(value)-1]=0; |
---|
153 | if(strlen(name)>0 && strlen(value)>0){ |
---|
154 | if(tmpMaps->content==NULL) |
---|
155 | tmpMaps->content=createMap(name,value); |
---|
156 | else |
---|
157 | addToMap(tmpMaps->content,name,value); |
---|
158 | free(value); |
---|
159 | } |
---|
160 | free(name); |
---|
161 | hasTwoElements=0; |
---|
162 | } |
---|
163 | token1 = strtok_r (NULL, "=", &saveptr1); |
---|
164 | } |
---|
165 | free(tmpToken); |
---|
166 | token = strtok_r (NULL, " ", &saveptr); |
---|
167 | } |
---|
168 | }else{ |
---|
169 | setMapInMaps(conf,"lenv","message",_("Unable to access the downloaded execution log file")); |
---|
170 | return SERVICE_FAILED; |
---|
171 | } |
---|
172 | } |
---|
173 | logPath=(char*)malloc((strlen(tmpPath->value)+strlen(jobid->value)+15)*sizeof(char)); |
---|
174 | sprintf(logPath,"%s/exec_status_%s",tmpPath->value,jobid->value); |
---|
175 | dumpMapsToFile(tmpMaps,logPath,0); |
---|
176 | char *sname=(char*)malloc((strlen(tmpPath->value)+strlen(jobid->value)+21)); |
---|
177 | sprintf(sname,"%s/.wait_socket_%s.sock",tmpPath->value,jobid->value); |
---|
178 | if ( (fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) { |
---|
179 | perror("socket error"); |
---|
180 | setMapInMaps(conf,"lenv","message",_("Socket error")); |
---|
181 | return SERVICE_FAILED; |
---|
182 | } |
---|
183 | memset(&addr, 0, sizeof(addr)); |
---|
184 | addr.sun_family = AF_UNIX; |
---|
185 | strncpy(addr.sun_path, sname, sizeof(addr.sun_path)-1); |
---|
186 | if (connect(fd, (struct sockaddr*)&addr, sizeof(addr)) == -1) { |
---|
187 | perror("connect error"); |
---|
188 | setMapInMaps(conf,"lenv","message",_("Unable to connect")); |
---|
189 | return SERVICE_FAILED; |
---|
190 | } |
---|
191 | if (write(fd, "3", 1) != rc) { |
---|
192 | if (rc < 0) { |
---|
193 | perror("write error"); |
---|
194 | setMapInMaps(conf,"lenv","message",_("Unable to announce the successful execution of the HPC service")); |
---|
195 | close(fd); |
---|
196 | return SERVICE_FAILED; |
---|
197 | } |
---|
198 | } |
---|
199 | close(fd); |
---|
200 | setOutputValue(outputs,"Result",(char*)"\"FinalizeHPC run successfully\"",32); |
---|
201 | unlink(flenv); |
---|
202 | free(flenv); |
---|
203 | |
---|
204 | return SERVICE_SUCCEEDED; |
---|
205 | } |
---|
206 | |
---|
207 | |
---|
208 | /** |
---|
209 | * FinalizeHPC1 ZOO Service : |
---|
210 | * This service is used to inform a ZOO-Kernel waiting for the end of the |
---|
211 | * execution of a HPC service |
---|
212 | * |
---|
213 | * format="AllocCPUS"; for i in $(sacct -e) ; do format="$format,$i"; done; format="$(echo $format | sed "s:AllocCPUS,::")" ; echo $format; sacct --format=$format -p | grep "997f-11e8-9f78-0050569320d2" |
---|
214 | * |
---|
215 | * AllocCPUS,AllocGRES,AllocNodes,AllocTRES,Account,AssocID,AveCPU,AveCPUFreq,AveDiskRead,AveDiskWrite,AvePages,AveRSS,AveVMSize,BlockID,Cluster,Comment,ConsumedEnergy,ConsumedEnergyRaw,CPUTime,CPUTimeRAW,DerivedExitCode,Elapsed,Eligible,End,ExitCode,GID,Group,JobID,JobIDRaw,JobName,Layout,MaxDiskRead,MaxDiskReadNode,MaxDiskReadTask,MaxDiskWrite,MaxDiskWriteNode,MaxDiskWriteTask,MaxPages,MaxPagesNode,MaxPagesTask,MaxRSS,MaxRSSNode,MaxRSSTask,MaxVMSize,MaxVMSizeNode,MaxVMSizeTask,MinCPU,MinCPUNode,MinCPUTask,NCPUS,NNodes,NodeList,NTasks,Priority,Partition,QOS,QOSRAW,ReqCPUFreq,ReqCPUFreqMin,ReqCPUFreqMax,ReqCPUFreqGov,ReqCPUS,ReqGRES,ReqMem,ReqNodes,ReqTRES,Reservation,ReservationId,Reserved,ResvCPU,ResvCPURAW,Start,State,Submit,Suspended,SystemCPU,Timelimit,TotalCPU,UID,User,UserCPU,WCKey,WCKeyID |
---|
216 | * 28||1|cpu=28,node=1|geosud|258|||||||||cluster||||00:00:56|56|0:0|00:00:02|2018-08-06T15:48:13|2018-08-06T15:48:16|0:0|1019|geosud|883299|883299|ZOO-Project_5bd1c32b-997f-11e8-9f78-0050569320d2_GSDBandMath_6_2_005||||||||||||||||||||28|1|muse044||4294360886|defq|qos_geosud|20|Unknown|Unknown|Unknown|Unknown|1||0n|1|cpu=1,node=1|||00:00:01|00:00:01|1|2018-08-06T15:48:14|COMPLETED|2018-08-06T15:48:13|00:00:00||UNLIMITED|00:00:00|1229|geosudwps|||0| |
---|
217 | * |
---|
218 | */ |
---|
219 | ZOO_DLL_EXPORT int FinalizeHPC1(maps*& conf,maps*& inputs,maps*& outputs){ |
---|
220 | // Retrieve the jobid corresponding to the identifier generated by SLURM |
---|
221 | // by reading the file generated when running the SBATCH file |
---|
222 | map* jobid=getMapFromMaps(inputs,"jobid","value"); |
---|
223 | struct sockaddr_un addr; |
---|
224 | char buf[100]="3"; |
---|
225 | int fd,rc=NULL; |
---|
226 | int i=0; |
---|
227 | map* usid=getMapFromMaps(conf,"lenv","usid"); |
---|
228 | map* tmpPath=getMapFromMaps(conf,"main","tmpPath"); |
---|
229 | |
---|
230 | char *flenv = |
---|
231 | (char *) malloc ((strlen (tmpPath->value) + |
---|
232 | strlen (jobid->value) + 12) * sizeof (char)); |
---|
233 | sprintf (flenv, "%s/%s_lenv.cfg", tmpPath->value, jobid->value); |
---|
234 | maps* m = (maps *) malloc (MAPS_SIZE); |
---|
235 | m->child=NULL; |
---|
236 | m->next=NULL; |
---|
237 | map* configId=NULL; |
---|
238 | |
---|
239 | |
---|
240 | if(conf_read(flenv, m) != 2){ |
---|
241 | configId=getMapFromMaps(m,"lenv","configId"); |
---|
242 | setMapInMaps(conf,"lenv","configId",configId->value); |
---|
243 | }else{ |
---|
244 | setMapInMaps(conf,"lenv","message",_("Unable to read the lenv section file of the requested jobid")); |
---|
245 | return SERVICE_FAILED; |
---|
246 | } |
---|
247 | |
---|
248 | SSHCON *test=ssh_connect(conf); |
---|
249 | /*if(test==NULL){ |
---|
250 | setMapInMaps(conf,"lenv","message",_("Unable to connect using ssh.")); |
---|
251 | return SERVICE_FAILED; |
---|
252 | }*/ |
---|
253 | |
---|
254 | char *logPath=(char*)malloc((strlen(tmpPath->value)+strlen(jobid->value)+12)*sizeof(char)); |
---|
255 | sprintf(logPath,"%s/exec_out_%s",tmpPath->value,jobid->value); |
---|
256 | struct stat f_status; |
---|
257 | int ts=stat(logPath, &f_status); |
---|
258 | char* fcontent = NULL; |
---|
259 | if(ts==0) { |
---|
260 | fcontent=(char*)malloc(sizeof(char)*(f_status.st_size+1)); |
---|
261 | FILE* f=fopen(logPath,"rb"); |
---|
262 | fread(fcontent,f_status.st_size,1,f); |
---|
263 | int fsize=f_status.st_size; |
---|
264 | fcontent[fsize]=0; |
---|
265 | fclose(f); |
---|
266 | }else{ |
---|
267 | setMapInMaps(conf,"lenv","message",_("No service with this jobid can be found")); |
---|
268 | return SERVICE_FAILED; |
---|
269 | } |
---|
270 | free(logPath); |
---|
271 | // Run sacct to check if the service execution ended. |
---|
272 | // Store all the informations returned by scontrol command as a cfg file to |
---|
273 | // be parsed back by the ZOO-Kernel waiting for the execution of the remote |
---|
274 | // service |
---|
275 | maps* tmpMaps=createMaps("henv"); |
---|
276 | |
---|
277 | map* tmpMap=getMapFromMaps(conf,configId->value,"remote_command_opt"); |
---|
278 | char* command=(char*)malloc((126+strlen(tmpMap->value))*sizeof(char)); |
---|
279 | sprintf(command,"sacct --format=%s -p | grep \"%s\" | sed \"s:||:|None|:g;s:||:|None|:g\"",tmpMap->value,jobid->value); |
---|
280 | if(ssh_exec(conf,command,ssh_get_cnt(conf))==0){ |
---|
281 | free(command); |
---|
282 | setMapInMaps(conf,"lenv","message",_("Failed to run sacct remotely")); |
---|
283 | // TODO: check status in db and if available continue in other case return SERVICE_FAILED |
---|
284 | return SERVICE_FAILED; |
---|
285 | }else{ |
---|
286 | free(command); |
---|
287 | logPath=(char*)malloc((strlen(tmpPath->value)+strlen(usid->value)+11)*sizeof(char)); |
---|
288 | sprintf(logPath,"%s/exec_out_%s",tmpPath->value,usid->value); |
---|
289 | int ts=stat(logPath, &f_status); |
---|
290 | if(ts==0) { |
---|
291 | fcontent=(char*)malloc(sizeof(char)*(f_status.st_size+1)); |
---|
292 | FILE* f=fopen(logPath,"rb"); |
---|
293 | fread(fcontent,f_status.st_size,1,f); |
---|
294 | int fsize=f_status.st_size; |
---|
295 | fcontent[fsize]=0; |
---|
296 | fclose(f); |
---|
297 | free(logPath); |
---|
298 | char *token, *saveptr; |
---|
299 | char *token1, *saveptr1; |
---|
300 | token = strtok_r (tmpMap->value, ",", &saveptr); |
---|
301 | token1 = strtok_r (fcontent, "|", &saveptr1); |
---|
302 | while (token != NULL) { |
---|
303 | fprintf(stderr,"%s %d %s \n",__FILE__,__LINE__,token); |
---|
304 | fflush(stderr); |
---|
305 | fprintf(stderr,"%s %d %s %s \n",__FILE__,__LINE__,token,token1); |
---|
306 | fflush(stderr); |
---|
307 | if(token1 != NULL){ |
---|
308 | if(tmpMaps->content==NULL) |
---|
309 | tmpMaps->content=createMap(token,token1); |
---|
310 | else |
---|
311 | addToMap(tmpMaps->content,token,token1); |
---|
312 | } |
---|
313 | token = strtok_r (NULL, ",", &saveptr); |
---|
314 | token1 = strtok_r (NULL, "|", &saveptr1); |
---|
315 | } |
---|
316 | }else{ |
---|
317 | free(logPath); |
---|
318 | setMapInMaps(conf,"lenv","message",_("Unable to access the downloaded execution log file")); |
---|
319 | return SERVICE_FAILED; |
---|
320 | } |
---|
321 | } |
---|
322 | tmpMap=getMapFromMaps(tmpMaps,"henv","JobId"); |
---|
323 | if(tmpMap!=NULL){ |
---|
324 | char* tmpStr=(char*)malloc((32)*sizeof(char)); |
---|
325 | sprintf(tmpStr,"slurm-%s.out",tmpMap->value); |
---|
326 | addToMap(tmpMaps->content,"StdErr",tmpStr); |
---|
327 | free(tmpStr); |
---|
328 | } |
---|
329 | logPath=(char*)malloc((strlen(tmpPath->value)+strlen(jobid->value)+15)*sizeof(char)); |
---|
330 | sprintf(logPath,"%s/exec_status_%s",tmpPath->value,jobid->value); |
---|
331 | dumpMapsToFile(tmpMaps,logPath,0); |
---|
332 | char *sname=(char*)malloc((strlen(tmpPath->value)+strlen(jobid->value)+21)); |
---|
333 | sprintf(sname,"%s/.wait_socket_%s.sock",tmpPath->value,jobid->value); |
---|
334 | if ( (fd = socket(AF_UNIX, SOCK_STREAM, 0)) == -1) { |
---|
335 | perror("socket error"); |
---|
336 | setMapInMaps(conf,"lenv","message",_("Socket error")); |
---|
337 | return SERVICE_FAILED; |
---|
338 | } |
---|
339 | memset(&addr, 0, sizeof(addr)); |
---|
340 | addr.sun_family = AF_UNIX; |
---|
341 | strncpy(addr.sun_path, sname, sizeof(addr.sun_path)-1); |
---|
342 | if (connect(fd, (struct sockaddr*)&addr, sizeof(addr)) == -1) { |
---|
343 | perror("connect error"); |
---|
344 | setMapInMaps(conf,"lenv","message",_("Unable to connect")); |
---|
345 | return SERVICE_FAILED; |
---|
346 | } |
---|
347 | if (write(fd, "3", 1) != rc) { |
---|
348 | if (rc < 0) { |
---|
349 | perror("write error"); |
---|
350 | setMapInMaps(conf,"lenv","message",_("Unable to announce the successful execution of the HPC service")); |
---|
351 | close(fd); |
---|
352 | return SERVICE_FAILED; |
---|
353 | } |
---|
354 | } |
---|
355 | close(fd); |
---|
356 | unlink(flenv); |
---|
357 | free(flenv); |
---|
358 | setOutputValue(outputs,"Result",(char*)"\"FinalizeHPC run successfully\"",32); |
---|
359 | |
---|
360 | return SERVICE_SUCCEEDED; |
---|
361 | } |
---|
362 | |
---|
363 | } |
---|