001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.net.examples.mail;
019
020import java.io.BufferedReader;
021import java.io.File;
022import java.io.FileReader;
023import java.io.IOException;
024import java.net.URI;
025import java.util.ArrayList;
026import java.util.BitSet;
027import java.util.List;
028import java.util.regex.Matcher;
029import java.util.regex.Pattern;
030
031import org.apache.commons.net.imap.IMAPClient;
032
033/**
034 * This is an example program demonstrating how to use the IMAP[S]Client class. This program connects to a IMAP[S] server and imports messages into the folder
035 * from an mbox file.
036 * <p>
037 * Usage: IMAPImportMbox imap[s]://user:password@host[:port]/folder/path <mboxfile> [selectors]
038 * <p>
039 * An example selector might be:
040 * <ul>
041 * <li>1,2,3,7-10</li>
042 * <li>-142986- : this is useful for retrieving messages by apmail number, which appears as From xyz-return-142986-apmail-...</li>
043 * </ul>
044 * <p>
045 * For example:<br>
046 * IMAPImportMbox imaps://user:pass@imap.googlemail.com/imported_messages 201401.mbox 1-10,20 -142986-
047 */
048public final class IMAPImportMbox {
049
050    private static final String CRLF = "\r\n";
051    private static final Pattern PATFROM = Pattern.compile(">+From "); // escaped From
052
053    private static String getDate(final String msg) {
054        // From SENDER Fri Sep 13 17:04:01 2019
055        final Pattern FROM_RE = Pattern.compile("From \\S+ +\\S+ (\\S+)  ?(\\S+) (\\S+) (\\S+)");
056        // [Fri] Sep 13 HMS 2019
057        // output date: 13-Sep-2019 17:04:01 +0000
058        String date = null;
059        final Matcher m = FROM_RE.matcher(msg);
060        if (m.lookingAt()) {
061            date = m.group(2) + "-" + m.group(1) + "-" + m.group(4) + " " + m.group(3) + " +0000";
062        }
063        return date;
064    }
065
066    /**
067     * Is at least one entry in the list contained in the string?
068     *
069     * @param contains the list of strings to look for
070     * @param string   the String to check against
071     * @return true if at least one entry in the contains list is contained in the string
072     */
073    private static boolean listContains(final List<String> contains, final String string) {
074        for (final String entry : contains) {
075            if (string.contains(entry)) {
076                return true;
077            }
078        }
079        return false;
080    }
081
082    public static void main(final String[] args) throws IOException {
083        if (args.length < 2) {
084            System.err.println("Usage: IMAPImportMbox imap[s]://user:password@host[:port]/folder/path <mboxfile> [selectors]");
085            System.err
086                    .println("\tWhere: a selector is a list of numbers/number ranges - 1,2,3-10" + " - or a list of strings to match in the initial From line");
087            System.exit(1);
088        }
089
090        final URI uri = URI.create(args[0]);
091        final String file = args[1];
092
093        final File mbox = new File(file);
094        if (!mbox.isFile() || !mbox.canRead()) {
095            throw new IOException("Cannot read mailbox file: " + mbox);
096        }
097
098        final String path = uri.getPath();
099        if (path == null || path.length() < 1) {
100            throw new IllegalArgumentException("Invalid folderPath: '" + path + "'");
101        }
102        final String folder = path.substring(1); // skip the leading /
103
104        final List<String> contains = new ArrayList<>(); // list of strings to find
105        final BitSet msgNums = new BitSet(); // list of message numbers
106
107        for (int i = 2; i < args.length; i++) {
108            final String arg = args[i];
109            if (arg.matches("\\d+(-\\d+)?(,\\d+(-\\d+)?)*")) { // number,m-n
110                for (final String entry : arg.split(",")) {
111                    final String[] parts = entry.split("-");
112                    if (parts.length == 2) { // m-n
113                        final int low = Integer.parseInt(parts[0]);
114                        final int high = Integer.parseInt(parts[1]);
115                        for (int j = low; j <= high; j++) {
116                            msgNums.set(j);
117                        }
118                    } else {
119                        msgNums.set(Integer.parseInt(entry));
120                    }
121                }
122            } else {
123                contains.add(arg); // not a number/number range
124            }
125        }
126//        System.out.println(msgNums.toString());
127//        System.out.println(java.util.Arrays.toString(contains.toArray()));
128
129        // Connect and login
130        final IMAPClient imap = IMAPUtils.imapLogin(uri, 10000, null);
131
132        int total = 0;
133        int loaded = 0;
134        try {
135            imap.setSoTimeout(6000);
136
137            final BufferedReader br = new BufferedReader(new FileReader(file)); // TODO charset?
138
139            String line;
140            final StringBuilder sb = new StringBuilder();
141            boolean wanted = false; // Skip any leading rubbish
142            while ((line = br.readLine()) != null) {
143                if (line.startsWith("From ")) { // start of message; i.e. end of previous (if any)
144                    if (process(sb, imap, folder, total)) { // process previous message (if any)
145                        loaded++;
146                    }
147                    sb.setLength(0);
148                    total++;
149                    wanted = wanted(total, line, msgNums, contains);
150                } else if (startsWith(line, PATFROM)) { // Unescape ">+From " in body text
151                    line = line.substring(1);
152                }
153                // TODO process first Received: line to determine arrival date?
154                if (wanted) {
155                    sb.append(line);
156                    sb.append(CRLF);
157                }
158            }
159            br.close();
160            if (wanted && process(sb, imap, folder, total)) { // last message (if any)
161                loaded++;
162            }
163        } catch (final IOException e) {
164            System.out.println("Error processing msg: " + total + " " + imap.getReplyString());
165            e.printStackTrace();
166            System.exit(10);
167            return;
168        } finally {
169            imap.logout();
170            imap.disconnect();
171        }
172        System.out.println("Processed " + total + " messages, loaded " + loaded);
173    }
174
175    private static boolean process(final StringBuilder sb, final IMAPClient imap, final String folder, final int msgNum) throws IOException {
176        final int length = sb.length();
177        final boolean haveMessage = length > 2;
178        if (haveMessage) {
179            System.out.println("MsgNum: " + msgNum + " Length " + length);
180            sb.setLength(length - 2); // drop trailing CRLF (mbox format has trailing blank line)
181            final String msg = sb.toString();
182            if (!imap.append(folder, null, getDate(msg), msg)) {
183                throw new IOException("Failed to import message: " + msgNum + " " + imap.getReplyString());
184            }
185        }
186        return haveMessage;
187    }
188
189    private static boolean startsWith(final String input, final Pattern pat) {
190        final Matcher m = pat.matcher(input);
191        return m.lookingAt();
192    }
193
194    /**
195     * Is the message wanted?
196     *
197     * @param msgNum   the message number
198     * @param line     the From line
199     * @param msgNums  the list of wanted message numbers
200     * @param contains the list of strings to be contained
201     * @return true if the message is wanted
202     */
203    private static boolean wanted(final int msgNum, final String line, final BitSet msgNums, final List<String> contains) {
204        return (msgNums.isEmpty() && contains.isEmpty()) // no selectors
205                || msgNums.get(msgNum) // matches message number
206                || listContains(contains, line); // contains string
207    }
208
209}