001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.net.examples.mail; 019 020import java.io.BufferedWriter; 021import java.io.File; 022import java.io.FileWriter; 023import java.io.IOException; 024import java.net.URI; 025import java.net.URISyntaxException; 026import java.text.ParseException; 027import java.text.SimpleDateFormat; 028import java.util.ArrayList; 029import java.util.Date; 030import java.util.Iterator; 031import java.util.List; 032import java.util.TimeZone; 033import java.util.concurrent.atomic.AtomicInteger; 034import java.util.regex.Matcher; 035import java.util.regex.Pattern; 036 037import org.apache.commons.net.PrintCommandListener; 038import org.apache.commons.net.ProtocolCommandEvent; 039import org.apache.commons.net.imap.IMAP; 040import org.apache.commons.net.imap.IMAP.IMAPChunkListener; 041import org.apache.commons.net.imap.IMAPClient; 042import org.apache.commons.net.imap.IMAPReply; 043 044/** 045 * This is an example program demonstrating how to use the IMAP[S]Client class. This program connects to a IMAP[S] server and exports selected messages from a 046 * folder into an mbox file. 047 * <p> 048 * Usage: IMAPExportMbox imap[s]://user:password@host[:port]/folder/path <mboxfile> [sequence-set] [item-names] 049 * <p> 050 * An example sequence-set might be: 051 * <ul> 052 * <li>11,2,3:10,20:*</li> 053 * <li>1:* - this is the default</li> 054 * </ul> 055 * <p> 056 * Some example item-names might be: 057 * <ul> 058 * <li>BODY.PEEK[HEADER]</li> 059 * <li>'BODY.PEEK[HEADER.FIELDS (SUBJECT)]'</li> 060 * <li>ALL - macro equivalent to '(FLAGS INTERNALDATE RFC822.SIZE ENVELOPE)'</li> 061 * <li>FAST - macro equivalent to '(FLAGS INTERNALDATE RFC822.SIZE)'</li> 062 * <li>FULL - macro equivalent to '(FLAGS INTERNALDATE RFC822.SIZE ENVELOPE BODY)'</li> 063 * <li>ENVELOPE X-GM-LABELS</li> 064 * <li>'(INTERNALDATE BODY.PEEK[])' - this is the default</li> 065 * </ul> 066 * <p> 067 * Macro names cannot be combined with anything else; they must be used alone.<br> 068 * Note that using BODY will set the \Seen flag. This is why the default uses BODY.PEEK[].<br> 069 * The item name X-GM-LABELS is a Google Mail extension; it shows the labels for a message.<br> 070 * For example:<br> 071 * IMAPExportMbox imaps://username:password@imap.googlemail.com/messages_for_export exported.mbox 1:10,20<br> 072 * IMAPExportMbox imaps://username:password@imap.googlemail.com/messages_for_export exported.mbox 3 ENVELOPE X-GM-LABELS<br> 073 * <p> 074 * The sequence-set is passed unmodified to the FETCH command.<br> 075 * The item names are wrapped in parentheses if more than one is provided. Otherwise, the parameter is assumed to be wrapped if necessary.<br> 076 * Parameters with spaces must be quoted otherwise the OS shell will normally treat them as separate parameters.<br> 077 * Also the listener that writes the mailbox only captures the multi-line responses (e.g. ones that include BODY references). It does not capture the output 078 * from FETCH commands using item names such as ENVELOPE or FLAGS that return a single line response. 079 */ 080public final class IMAPExportMbox { 081 082 private static class MboxListener implements IMAPChunkListener { 083 084 private final BufferedWriter bufferedWriter; 085 volatile AtomicInteger total = new AtomicInteger(); 086 volatile String lastFetched; 087 volatile List<String> missingIds = new ArrayList<>(); 088 volatile long lastSeq = -1; 089 private final String lineSeparator; 090 private final SimpleDateFormat DATE_FORMAT // for mbox From_ lines 091 = new SimpleDateFormat("EEE MMM dd HH:mm:ss yyyy"); 092 093 // e.g. INTERNALDATE "27-Oct-2013 07:43:24 +0000" 094 // for parsing INTERNALDATE 095 private final SimpleDateFormat IDPARSE = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss Z"); 096 private final boolean printHash; 097 private final boolean printMarker; 098 private final boolean checkSequence; 099 100 MboxListener(final BufferedWriter bufferedWriter, final String lineSeparator, final boolean printHash, final boolean printMarker, 101 final boolean checkSequence) { 102 this.lineSeparator = lineSeparator; 103 this.printHash = printHash; 104 this.printMarker = printMarker; 105 DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("GMT")); 106 this.bufferedWriter = bufferedWriter; 107 this.checkSequence = checkSequence; 108 } 109 110 @Override 111 public boolean chunkReceived(final IMAP imap) { 112 final String[] replyStrings = imap.getReplyStrings(); 113 Date received = new Date(); 114 final String firstLine = replyStrings[0]; 115 Matcher m = PATID.matcher(firstLine); 116 if (m.lookingAt()) { // found a match 117 final String date = m.group(PATID_DATE_GROUP); 118 try { 119 received = IDPARSE.parse(date); 120 } catch (final ParseException e) { 121 System.err.println(e); 122 } 123 } else { 124 System.err.println("No timestamp found in: " + firstLine + " - using current time"); 125 } 126 String replyTo = "MAILER-DAEMON"; // default 127 for (int i = 1; i < replyStrings.length - 1; i++) { 128 final String line = replyStrings[i]; 129 if (line.startsWith("Return-Path: ")) { 130 final String[] parts = line.split(" ", 2); 131 if (!parts[1].equals("<>")) {// Don't replace default with blank 132 replyTo = parts[1]; 133 if (replyTo.startsWith("<")) { 134 if (replyTo.endsWith(">")) { 135 replyTo = replyTo.substring(1, replyTo.length() - 1); // drop <> wrapper 136 } else { 137 System.err.println("Unexpected Return-path: '" + line + "' in " + firstLine); 138 } 139 } 140 } 141 break; 142 } 143 } 144 try { 145 // Add initial mbox header line 146 bufferedWriter.append("From "); 147 bufferedWriter.append(replyTo); 148 bufferedWriter.append(' '); 149 bufferedWriter.append(DATE_FORMAT.format(received)); 150 bufferedWriter.append(lineSeparator); 151 // Debug 152 bufferedWriter.append("X-IMAP-Response: ").append(firstLine).append(lineSeparator); 153 if (printMarker) { 154 System.err.println("[" + total + "] " + firstLine); 155 } 156 // Skip first and last lines 157 for (int i = 1; i < replyStrings.length - 1; i++) { 158 final String line = replyStrings[i]; 159 if (startsWith(line, PATFROM)) { 160 bufferedWriter.append('>'); // Escape a From_ line 161 } 162 bufferedWriter.append(line); 163 bufferedWriter.append(lineSeparator); 164 } 165 // The last line ends with the trailing closing ")" which needs to be stripped 166 final String lastLine = replyStrings[replyStrings.length - 1]; 167 final int lastLength = lastLine.length(); 168 if (lastLength > 1) { // there's some content, we need to save it 169 bufferedWriter.append(lastLine, 0, lastLength - 1); 170 bufferedWriter.append(lineSeparator); 171 } 172 bufferedWriter.append(lineSeparator); // blank line between entries 173 } catch (final IOException e) { 174 e.printStackTrace(); 175 throw new RuntimeException(e); // chunkReceived cannot throw a checked Exception 176 } 177 lastFetched = firstLine; 178 total.incrementAndGet(); 179 if (checkSequence) { 180 m = PATSEQ.matcher(firstLine); 181 if (m.lookingAt()) { // found a match 182 final long msgSeq = Long.parseLong(m.group(PATSEQ_SEQUENCE_GROUP)); // Cannot fail to parse 183 if (lastSeq != -1) { 184 final long missing = msgSeq - lastSeq - 1; 185 if (missing != 0) { 186 for (long j = lastSeq + 1; j < msgSeq; j++) { 187 missingIds.add(String.valueOf(j)); 188 } 189 System.err.println("*** Sequence error: current=" + msgSeq + " previous=" + lastSeq + " Missing=" + missing); 190 } 191 } 192 lastSeq = msgSeq; 193 } 194 } 195 if (printHash) { 196 System.err.print("."); 197 } 198 return true; 199 } 200 201 public void close() throws IOException { 202 if (bufferedWriter != null) { 203 bufferedWriter.close(); 204 } 205 } 206 } 207 208 private static final String CRLF = "\r\n"; 209 private static final String LF = "\n"; 210 211 private static final String EOL_DEFAULT = System.lineSeparator(); 212 private static final Pattern PATFROM = Pattern.compile(">*From "); // unescaped From_ 213 // e.g. * nnn (INTERNALDATE "27-Oct-2013 07:43:24 +0000" BODY[] {nn} ...) 214 private static final Pattern PATID = // INTERNALDATE 215 Pattern.compile(".*INTERNALDATE \"(\\d\\d-\\w{3}-\\d{4} \\d\\d:\\d\\d:\\d\\d [+-]\\d+)\""); 216 217 private static final int PATID_DATE_GROUP = 1; 218 private static final Pattern PATSEQ = Pattern.compile("\\* (\\d+) "); // Sequence number 219 220 private static final int PATSEQ_SEQUENCE_GROUP = 1; 221 222 // e.g. * 382 EXISTS 223 private static final Pattern PATEXISTS = Pattern.compile("\\* (\\d+) EXISTS"); // Response from SELECT 224 225 // AAAC NO [TEMPFAIL] FETCH Temporary failure on server [CODE: WBL] 226 private static final Pattern PATTEMPFAIL = Pattern.compile("[A-Z]{4} NO \\[TEMPFAIL\\] FETCH .*"); 227 private static final int CONNECT_TIMEOUT = 10; // Seconds 228 229 private static final int READ_TIMEOUT = 10; 230 231 public static void main(final String[] args) throws IOException, URISyntaxException { 232 int connect_timeout = CONNECT_TIMEOUT; 233 int read_timeout = READ_TIMEOUT; 234 235 int argIdx = 0; 236 String eol = EOL_DEFAULT; 237 boolean printHash = false; 238 boolean printMarker = false; 239 int retryWaitSecs = 0; 240 241 for (argIdx = 0; argIdx < args.length; argIdx++) { 242 if (args[argIdx].equals("-c")) { 243 connect_timeout = Integer.parseInt(args[++argIdx]); 244 } else if (args[argIdx].equals("-r")) { 245 read_timeout = Integer.parseInt(args[++argIdx]); 246 } else if (args[argIdx].equals("-R")) { 247 retryWaitSecs = Integer.parseInt(args[++argIdx]); 248 } else if (args[argIdx].equals("-LF")) { 249 eol = LF; 250 } else if (args[argIdx].equals("-CRLF")) { 251 eol = CRLF; 252 } else if (args[argIdx].equals("-.")) { 253 printHash = true; 254 } else if (args[argIdx].equals("-X")) { 255 printMarker = true; 256 } else { 257 break; 258 } 259 } 260 261 final int argCount = args.length - argIdx; 262 263 if (argCount < 2) { 264 System.err.println("Usage: IMAPExportMbox [-LF|-CRLF] [-c n] [-r n] [-R n] [-.] [-X]" 265 + " imap[s]://user:password@host[:port]/folder/path [+|-]<mboxfile> [sequence-set] [itemnames]"); 266 System.err.println("\t-LF | -CRLF set end-of-line to LF or CRLF (default is the line.separator system property)"); 267 System.err.println("\t-c connect timeout in seconds (default 10)"); 268 System.err.println("\t-r read timeout in seconds (default 10)"); 269 System.err.println("\t-R temporary failure retry wait in seconds (default 0; i.e. disabled)"); 270 System.err.println("\t-. print a . for each complete message received"); 271 System.err.println("\t-X print the X-IMAP line for each complete message received"); 272 System.err.println("\tthe mboxfile is where the messages are stored; use '-' to write to standard output."); 273 System.err.println("\tPrefix file name with '+' to append to the file. Prefix with '-' to allow overwrite."); 274 System.err.println("\ta sequence-set is a list of numbers/number ranges e.g. 1,2,3-10,20:* - default 1:*"); 275 System.err.println("\titemnames are the message data item name(s) e.g. BODY.PEEK[HEADER.FIELDS (SUBJECT)]" 276 + " or a macro e.g. ALL - default (INTERNALDATE BODY.PEEK[])"); 277 System.exit(1); 278 } 279 280 final String uriString = args[argIdx++]; 281 URI uri; 282 try { 283 uri = URI.create(uriString); 284 } catch (final IllegalArgumentException e) { // cannot parse the path as is; let's pull it apart and try again 285 final Matcher m = Pattern.compile("(imaps?://[^/]+)(/.*)").matcher(uriString); 286 if (!m.matches()) { 287 throw e; 288 } 289 uri = URI.create(m.group(1)); // Just the scheme and auth parts 290 uri = new URI(uri.getScheme(), uri.getAuthority(), m.group(2), null, null); 291 } 292 final String file = args[argIdx++]; 293 String sequenceSet = argCount > 2 ? args[argIdx++] : "1:*"; 294 final String itemNames; 295 // Handle 0, 1 or multiple item names 296 if (argCount > 3) { 297 if (argCount > 4) { 298 final StringBuilder sb = new StringBuilder(); 299 sb.append("("); 300 for (int i = 4; i <= argCount; i++) { 301 if (i > 4) { 302 sb.append(" "); 303 } 304 sb.append(args[argIdx++]); 305 } 306 sb.append(")"); 307 itemNames = sb.toString(); 308 } else { 309 itemNames = args[argIdx++]; 310 } 311 } else { 312 itemNames = "(INTERNALDATE BODY.PEEK[])"; 313 } 314 315 final boolean checkSequence = sequenceSet.matches("\\d+:(\\d+|\\*)"); // are we expecting a sequence? 316 final MboxListener mboxListener; 317 if (file.equals("-")) { 318 mboxListener = null; 319 } else if (file.startsWith("+")) { 320 final File mbox = new File(file.substring(1)); 321 System.out.println("Appending to file " + mbox); 322 mboxListener = new MboxListener(new BufferedWriter(new FileWriter(mbox, true)), eol, printHash, printMarker, checkSequence); 323 } else if (file.startsWith("-")) { 324 final File mbox = new File(file.substring(1)); 325 System.out.println("Writing to file " + mbox); 326 mboxListener = new MboxListener(new BufferedWriter(new FileWriter(mbox, false)), eol, printHash, printMarker, checkSequence); 327 } else { 328 final File mboxFile = new File(file); 329 if (mboxFile.exists() && mboxFile.length() > 0) { 330 throw new IOException("mailbox file: " + mboxFile + " already exists and is non-empty!"); 331 } 332 System.out.println("Creating file " + mboxFile); 333 mboxListener = new MboxListener(new BufferedWriter(new FileWriter(mboxFile)), eol, printHash, printMarker, checkSequence); 334 } 335 336 final String path = uri.getPath(); 337 if (path == null || path.length() < 1) { 338 throw new IllegalArgumentException("Invalid folderPath: '" + path + "'"); 339 } 340 final String folder = path.substring(1); // skip the leading / 341 342 // suppress login details 343 final PrintCommandListener listener = new PrintCommandListener(System.out, true) { 344 @Override 345 public void protocolReplyReceived(final ProtocolCommandEvent event) { 346 if (event.getReplyCode() != IMAPReply.PARTIAL) { // This is dealt with by the chunk listener 347 super.protocolReplyReceived(event); 348 } 349 } 350 }; 351 352 // Connect and login 353 final IMAPClient imap = IMAPUtils.imapLogin(uri, connect_timeout * 1000, listener); 354 355 String maxIndexInFolder = null; 356 357 try { 358 359 imap.setSoTimeout(read_timeout * 1000); 360 361 if (!imap.select(folder)) { 362 throw new IOException("Could not select folder: " + folder); 363 } 364 365 for (final String line : imap.getReplyStrings()) { 366 maxIndexInFolder = matches(line, PATEXISTS, 1); 367 if (maxIndexInFolder != null) { 368 break; 369 } 370 } 371 372 if (mboxListener != null) { 373 imap.setChunkListener(mboxListener); 374 } // else the command listener displays the full output without processing 375 376 while (true) { 377 final boolean ok = imap.fetch(sequenceSet, itemNames); 378 // If the fetch failed, can we retry? 379 if (ok || (retryWaitSecs <= 0) || (mboxListener == null) || !checkSequence) { 380 break; 381 } 382 final String replyString = imap.getReplyString(); // includes EOL 383 if (!startsWith(replyString, PATTEMPFAIL)) { 384 throw new IOException("FETCH " + sequenceSet + " " + itemNames + " failed with " + replyString); 385 } 386 System.err.println("Temporary error detected, will retry in " + retryWaitSecs + "seconds"); 387 sequenceSet = mboxListener.lastSeq + 1 + ":*"; 388 try { 389 Thread.sleep(retryWaitSecs * 1000); 390 } catch (final InterruptedException e) { 391 // ignored 392 } 393 } 394 395 } catch (final IOException ioe) { 396 final String count = mboxListener == null ? "?" : mboxListener.total.toString(); 397 System.err.println("FETCH " + sequenceSet + " " + itemNames + " failed after processing " + count + " complete messages "); 398 if (mboxListener != null) { 399 System.err.println("Last complete response seen: " + mboxListener.lastFetched); 400 } 401 throw ioe; 402 } finally { 403 404 if (printHash) { 405 System.err.println(); 406 } 407 408 if (mboxListener != null) { 409 mboxListener.close(); 410 final Iterator<String> missingIds = mboxListener.missingIds.iterator(); 411 if (missingIds.hasNext()) { 412 final StringBuilder sb = new StringBuilder(); 413 for (;;) { 414 sb.append(missingIds.next()); 415 if (!missingIds.hasNext()) { 416 break; 417 } 418 sb.append(","); 419 } 420 System.err.println("*** Missing ids: " + sb.toString()); 421 } 422 } 423 imap.logout(); 424 imap.disconnect(); 425 } 426 if (mboxListener != null) { 427 System.out.println("Processed " + mboxListener.total + " messages."); 428 } 429 if (maxIndexInFolder != null) { 430 System.out.println("Folder contained " + maxIndexInFolder + " messages."); 431 } 432 } 433 434 private static String matches(final String input, final Pattern pat, final int index) { 435 final Matcher m = pat.matcher(input); 436 if (m.lookingAt()) { 437 return m.group(index); 438 } 439 return null; 440 } 441 442 private static boolean startsWith(final String input, final Pattern pat) { 443 final Matcher m = pat.matcher(input); 444 return m.lookingAt(); 445 } 446}