001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.net.nntp;
019
020/**
021 * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski.
022 * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details.
023 * For his Java implementation, see
024 * <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java">
025 * http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a>
026 */
027
028import java.util.Arrays;
029import java.util.HashMap;
030import java.util.List;
031import java.util.Map;
032
033public class Threader {
034
035    /**
036     *
037     * @param threadable
038     * @param idTable
039     */
040    private void buildContainer(final Threadable threadable, final HashMap<String, ThreadContainer> idTable) {
041        String id = threadable.messageThreadId();
042        ThreadContainer container = idTable.get(id);
043        int bogusIdCount = 0;
044
045        // A ThreadContainer exists for this id already. This should be a forward reference, but may
046        // be a duplicate id, in which case we will need to generate a bogus placeholder id
047        if (container != null) {
048            if (container.threadable != null) { // oops! duplicate ids...
049                bogusIdCount++; // Avoid dead local store warning
050                id = "<Bogus-id:" + (bogusIdCount) + ">";
051                container = null;
052            } else {
053                // The container just contained a forward reference to this message, so let's
054                // fill in the threadable field of the container with this message
055                container.threadable = threadable;
056            }
057        }
058
059        // No container exists for that message Id. Create one and insert it into the hash table.
060        if (container == null) {
061            container = new ThreadContainer();
062            container.threadable = threadable;
063            idTable.put(id, container);
064        }
065
066        // Iterate through all of the references and create ThreadContainers for any references that
067        // don't have them.
068        ThreadContainer parentRef = null;
069        {
070            final String[] references = threadable.messageThreadReferences();
071            for (final String refString : references) {
072                ThreadContainer ref = idTable.get(refString);
073
074                // if this id doesnt have a container, create one
075                if (ref == null) {
076                    ref = new ThreadContainer();
077                    idTable.put(refString, ref);
078                }
079
080                // Link references together in the order they appear in the References: header,
081                // IF they dont have a have a parent already &&
082                // IF it will not cause a circular reference
083                if ((parentRef != null) && (ref.parent == null) && (parentRef != ref) && !(ref.findChild(parentRef))) {
084                    // Link ref into the parent's child list
085                    ref.parent = parentRef;
086                    ref.next = parentRef.child;
087                    parentRef.child = ref;
088                }
089                parentRef = ref;
090            }
091        }
092
093        // parentRef is now set to the container of the last element in the references field. make that
094        // be the parent of this container, unless doing so causes a circular reference
095        if (parentRef != null && (parentRef == container || container.findChild(parentRef))) {
096            parentRef = null;
097        }
098
099        // if it has a parent already, its because we saw this message in a References: field, and presumed
100        // a parent based on the other entries in that field. Now that we have the actual message, we can
101        // throw away the old parent and use this new one
102        if (container.parent != null) {
103            ThreadContainer rest, prev;
104
105            for (prev = null, rest = container.parent.child; rest != null; prev = rest, rest = rest.next) {
106                if (rest == container) {
107                    break;
108                }
109            }
110
111            if (rest == null) {
112                throw new RuntimeException("Didnt find " + container + " in parent" + container.parent);
113            }
114
115            // Unlink this container from the parent's child list
116            if (prev == null) {
117                container.parent.child = container.next;
118            } else {
119                prev.next = container.next;
120            }
121
122            container.next = null;
123            container.parent = null;
124        }
125
126        // If we have a parent, link container into the parents child list
127        if (parentRef != null) {
128            container.parent = parentRef;
129            container.next = parentRef.child;
130            parentRef.child = container;
131        }
132    }
133
134    /**
135     * Find the root set of all existing ThreadContainers
136     *
137     * @param idTable
138     * @return root the ThreadContainer representing the root node
139     */
140    private ThreadContainer findRootSet(final HashMap<String, ThreadContainer> idTable) {
141        final ThreadContainer root = new ThreadContainer();
142        for (final Map.Entry<String, ThreadContainer> entry : idTable.entrySet()) {
143            final ThreadContainer c = entry.getValue();
144            if (c.parent == null) {
145                if (c.next != null) {
146                    throw new RuntimeException("c.next is " + c.next.toString());
147                }
148                c.next = root.child;
149                root.child = c;
150            }
151        }
152        return root;
153    }
154
155    /**
156     * If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers.
157     *
158     * @param root
159     */
160    private void gatherSubjects(final ThreadContainer root) {
161
162        int count = 0;
163
164        for (ThreadContainer c = root.child; c != null; c = c.next) {
165            count++;
166        }
167
168        // TODO verify this will avoid rehashing
169        HashMap<String, ThreadContainer> subjectTable = new HashMap<>((int) (count * 1.2), (float) 0.9);
170        count = 0;
171
172        for (ThreadContainer c = root.child; c != null; c = c.next) {
173            Threadable threadable = c.threadable;
174
175            // No threadable? If so, it is a dummy node in the root set.
176            // Only root set members may be dummies, and they alway have at least 2 kids
177            // Take the first kid as representative of the subject
178            if (threadable == null) {
179                threadable = c.child.threadable;
180            }
181
182            final String subj = threadable.simplifiedSubject();
183
184            if (subj == null || subj.isEmpty()) {
185                continue;
186            }
187
188            final ThreadContainer old = subjectTable.get(subj);
189
190            // Add this container to the table iff:
191            // - There exists no container with this subject
192            // - or this is a dummy container and the old one is not - the dummy one is
193            // more interesting as a root, so put it in the table instead
194            // - The container in the table has a "Re:" version of this subject, and
195            // this container has a non-"Re:" version of this subject. The non-"Re:" version
196            // is the more interesting of the two.
197            if (old == null || (c.threadable == null && old.threadable != null)
198                    || (old.threadable != null && old.threadable.subjectIsReply() && c.threadable != null && !c.threadable.subjectIsReply())) {
199                subjectTable.put(subj, c);
200                count++;
201            }
202        }
203
204        // If the table is empty, we're done
205        if (count == 0) {
206            return;
207        }
208
209        // subjectTable is now populated with one entry for each subject which occurs in the
210        // root set. Iterate over the root set, and gather together the difference.
211        ThreadContainer prev, c, rest;
212        for (prev = null, c = root.child, rest = c.next; c != null; prev = c, c = rest, rest = (rest == null ? null : rest.next)) {
213            Threadable threadable = c.threadable;
214
215            // is it a dummy node?
216            if (threadable == null) {
217                threadable = c.child.threadable;
218            }
219
220            final String subj = threadable.simplifiedSubject();
221
222            // Dont thread together all subjectless messages
223            if (subj == null || subj.isEmpty()) {
224                continue;
225            }
226
227            final ThreadContainer old = subjectTable.get(subj);
228
229            if (old == c) { // That's us
230                continue;
231            }
232
233            // We have now found another container in the root set with the same subject
234            // Remove the "second" message from the root set
235            if (prev == null) {
236                root.child = c.next;
237            } else {
238                prev.next = c.next;
239            }
240            c.next = null;
241
242            if (old.threadable == null && c.threadable == null) {
243                // both dummies - merge them
244                ThreadContainer tail;
245                for (tail = old.child; tail != null && tail.next != null; tail = tail.next) {
246                    // do nothing
247                }
248
249                if (tail != null) { // protect against possible NPE
250                    tail.next = c.child;
251                }
252
253                for (tail = c.child; tail != null; tail = tail.next) {
254                    tail.parent = old;
255                }
256
257                c.child = null;
258            } else if (old.threadable == null || (c.threadable != null && c.threadable.subjectIsReply() && !old.threadable.subjectIsReply())) {
259                // Else if old is empty, or c has "Re:" and old does not ==> make this message a child of old
260                c.parent = old;
261                c.next = old.child;
262                old.child = c;
263            } else {
264                // else make the old and new messages be children of a new dummy container.
265                // We create a new container object for old.msg and empty the old container
266                final ThreadContainer newc = new ThreadContainer();
267                newc.threadable = old.threadable;
268                newc.child = old.child;
269
270                for (ThreadContainer tail = newc.child; tail != null; tail = tail.next) {
271                    tail.parent = newc;
272                }
273
274                old.threadable = null;
275                old.child = null;
276
277                c.parent = old;
278                newc.parent = old;
279
280                // Old is now a dummy- give it 2 kids , c and newc
281                old.child = c;
282                c.next = newc;
283            }
284            // We've done a merge, so keep the same prev
285            c = prev;
286        }
287
288        subjectTable.clear();
289        subjectTable = null;
290
291    }
292
293    /**
294     * Delete any empty or dummy ThreadContainers
295     *
296     * @param parent
297     */
298    private void pruneEmptyContainers(final ThreadContainer parent) {
299        ThreadContainer container, prev, next;
300        for (prev = null, container = parent.child, next = container.next; container != null; prev = container, container = next, next = (container == null
301                ? null
302                : container.next)) {
303
304            // Is it empty and without any children? If so,delete it
305            if (container.threadable == null && container.child == null) {
306                if (prev == null) {
307                    parent.child = container.next;
308                } else {
309                    prev.next = container.next;
310                }
311
312                // Set container to prev so that prev keeps its same value the next time through the loop
313                container = prev;
314            }
315
316            // Else if empty, with kids, and (not at root or only one kid)
317            else if (container.threadable == null && (container.parent != null || container.child.next == null)) {
318                // We have an invalid/expired message with kids. Promote the kids to this level.
319                ThreadContainer tail;
320                final ThreadContainer kids = container.child;
321
322                // Remove this container and replace with 'kids'.
323                if (prev == null) {
324                    parent.child = kids;
325                } else {
326                    prev.next = kids;
327                }
328
329                // Make each child's parent be this level's parent -> i.e. promote the children.
330                // Make the last child's next point to this container's next
331                // i.e. splice kids into the list in place of container
332                for (tail = kids; tail.next != null; tail = tail.next) {
333                    tail.parent = container.parent;
334                }
335
336                tail.parent = container.parent;
337                tail.next = container.next;
338
339                // next currently points to the item after the inserted items in the chain - reset that so we process the newly
340                // promoted items next time round
341                next = kids;
342
343                // Set container to prev so that prev keeps its same value the next time through the loop
344                container = prev;
345            } else if (container.child != null) {
346                // A real message , with kids
347                // Iterate over the children
348                pruneEmptyContainers(container);
349            }
350        }
351    }
352
353    /**
354     * The client passes in a list of Iterable objects, and the Threader constructs a connected 'graph' of messages
355     *
356     * @param messages iterable of messages to thread, must not be empty
357     * @return null if messages == null or root.child == null or messages list is empty
358     * @since 3.0
359     */
360    public Threadable thread(final Iterable<? extends Threadable> messages) {
361        if (messages == null) {
362            return null;
363        }
364
365        HashMap<String, ThreadContainer> idTable = new HashMap<>();
366
367        // walk through each Threadable element
368        for (final Threadable t : messages) {
369            if (!t.isDummy()) {
370                buildContainer(t, idTable);
371            }
372        }
373
374        if (idTable.isEmpty()) {
375            return null;
376        }
377
378        final ThreadContainer root = findRootSet(idTable);
379        idTable.clear();
380        idTable = null;
381
382        pruneEmptyContainers(root);
383
384        root.reverseChildren();
385        gatherSubjects(root);
386
387        if (root.next != null) {
388            throw new RuntimeException("root node has a next:" + root);
389        }
390
391        for (ThreadContainer r = root.child; r != null; r = r.next) {
392            if (r.threadable == null) {
393                r.threadable = r.child.threadable.makeDummy();
394            }
395        }
396
397        final Threadable result = (root.child == null ? null : root.child.threadable);
398        root.flush();
399
400        return result;
401    }
402
403    /**
404     * The client passes in a list of Threadable objects, and the Threader constructs a connected 'graph' of messages
405     *
406     * @param messages list of messages to thread, must not be empty
407     * @return null if messages == null or root.child == null or messages list is empty
408     * @since 2.2
409     */
410    public Threadable thread(final List<? extends Threadable> messages) {
411        return thread((Iterable<? extends Threadable>) messages);
412    }
413
414    // DEPRECATED METHODS - for API compatibility only - DO NOT USE
415
416    /**
417     * The client passes in an array of Threadable objects, and the Threader constructs a connected 'graph' of messages
418     *
419     * @param messages array of messages to thread, must not be empty
420     * @return null if messages == null or root.child == null or messages array is empty
421     * @deprecated (2.2) prefer {@link #thread(List)}
422     */
423    @Deprecated
424    public Threadable thread(final Threadable[] messages) {
425        if (messages == null) {
426            return null;
427        }
428        return thread(Arrays.asList(messages));
429    }
430
431}