001/* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018package org.apache.commons.net.nntp; 019 020/** 021 * This is an implementation of a message threading algorithm, as originally devised by Zamie Zawinski. 022 * See <a href="http://www.jwz.org/doc/threading.html">http://www.jwz.org/doc/threading.html</a> for details. 023 * For his Java implementation, see 024 * <a href="http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java"> 025 * http://lxr.mozilla.org/mozilla/source/grendel/sources/grendel/view/Threader.java</a> 026 */ 027 028import java.util.Arrays; 029import java.util.HashMap; 030import java.util.List; 031import java.util.Map; 032 033public class Threader { 034 035 /** 036 * 037 * @param threadable 038 * @param idTable 039 */ 040 private void buildContainer(final Threadable threadable, final HashMap<String, ThreadContainer> idTable) { 041 String id = threadable.messageThreadId(); 042 ThreadContainer container = idTable.get(id); 043 int bogusIdCount = 0; 044 045 // A ThreadContainer exists for this id already. This should be a forward reference, but may 046 // be a duplicate id, in which case we will need to generate a bogus placeholder id 047 if (container != null) { 048 if (container.threadable != null) { // oops! duplicate ids... 049 bogusIdCount++; // Avoid dead local store warning 050 id = "<Bogus-id:" + (bogusIdCount) + ">"; 051 container = null; 052 } else { 053 // The container just contained a forward reference to this message, so let's 054 // fill in the threadable field of the container with this message 055 container.threadable = threadable; 056 } 057 } 058 059 // No container exists for that message Id. Create one and insert it into the hash table. 060 if (container == null) { 061 container = new ThreadContainer(); 062 container.threadable = threadable; 063 idTable.put(id, container); 064 } 065 066 // Iterate through all of the references and create ThreadContainers for any references that 067 // don't have them. 068 ThreadContainer parentRef = null; 069 { 070 final String[] references = threadable.messageThreadReferences(); 071 for (final String refString : references) { 072 ThreadContainer ref = idTable.get(refString); 073 074 // if this id doesnt have a container, create one 075 if (ref == null) { 076 ref = new ThreadContainer(); 077 idTable.put(refString, ref); 078 } 079 080 // Link references together in the order they appear in the References: header, 081 // IF they dont have a have a parent already && 082 // IF it will not cause a circular reference 083 if ((parentRef != null) && (ref.parent == null) && (parentRef != ref) && !(ref.findChild(parentRef))) { 084 // Link ref into the parent's child list 085 ref.parent = parentRef; 086 ref.next = parentRef.child; 087 parentRef.child = ref; 088 } 089 parentRef = ref; 090 } 091 } 092 093 // parentRef is now set to the container of the last element in the references field. make that 094 // be the parent of this container, unless doing so causes a circular reference 095 if (parentRef != null && (parentRef == container || container.findChild(parentRef))) { 096 parentRef = null; 097 } 098 099 // if it has a parent already, its because we saw this message in a References: field, and presumed 100 // a parent based on the other entries in that field. Now that we have the actual message, we can 101 // throw away the old parent and use this new one 102 if (container.parent != null) { 103 ThreadContainer rest, prev; 104 105 for (prev = null, rest = container.parent.child; rest != null; prev = rest, rest = rest.next) { 106 if (rest == container) { 107 break; 108 } 109 } 110 111 if (rest == null) { 112 throw new RuntimeException("Didnt find " + container + " in parent" + container.parent); 113 } 114 115 // Unlink this container from the parent's child list 116 if (prev == null) { 117 container.parent.child = container.next; 118 } else { 119 prev.next = container.next; 120 } 121 122 container.next = null; 123 container.parent = null; 124 } 125 126 // If we have a parent, link container into the parents child list 127 if (parentRef != null) { 128 container.parent = parentRef; 129 container.next = parentRef.child; 130 parentRef.child = container; 131 } 132 } 133 134 /** 135 * Find the root set of all existing ThreadContainers 136 * 137 * @param idTable 138 * @return root the ThreadContainer representing the root node 139 */ 140 private ThreadContainer findRootSet(final HashMap<String, ThreadContainer> idTable) { 141 final ThreadContainer root = new ThreadContainer(); 142 for (final Map.Entry<String, ThreadContainer> entry : idTable.entrySet()) { 143 final ThreadContainer c = entry.getValue(); 144 if (c.parent == null) { 145 if (c.next != null) { 146 throw new RuntimeException("c.next is " + c.next.toString()); 147 } 148 c.next = root.child; 149 root.child = c; 150 } 151 } 152 return root; 153 } 154 155 /** 156 * If any two members of the root set have the same subject, merge them. This is to attempt to accomodate messages without References: headers. 157 * 158 * @param root 159 */ 160 private void gatherSubjects(final ThreadContainer root) { 161 162 int count = 0; 163 164 for (ThreadContainer c = root.child; c != null; c = c.next) { 165 count++; 166 } 167 168 // TODO verify this will avoid rehashing 169 HashMap<String, ThreadContainer> subjectTable = new HashMap<>((int) (count * 1.2), (float) 0.9); 170 count = 0; 171 172 for (ThreadContainer c = root.child; c != null; c = c.next) { 173 Threadable threadable = c.threadable; 174 175 // No threadable? If so, it is a dummy node in the root set. 176 // Only root set members may be dummies, and they alway have at least 2 kids 177 // Take the first kid as representative of the subject 178 if (threadable == null) { 179 threadable = c.child.threadable; 180 } 181 182 final String subj = threadable.simplifiedSubject(); 183 184 if (subj == null || subj.isEmpty()) { 185 continue; 186 } 187 188 final ThreadContainer old = subjectTable.get(subj); 189 190 // Add this container to the table iff: 191 // - There exists no container with this subject 192 // - or this is a dummy container and the old one is not - the dummy one is 193 // more interesting as a root, so put it in the table instead 194 // - The container in the table has a "Re:" version of this subject, and 195 // this container has a non-"Re:" version of this subject. The non-"Re:" version 196 // is the more interesting of the two. 197 if (old == null || (c.threadable == null && old.threadable != null) 198 || (old.threadable != null && old.threadable.subjectIsReply() && c.threadable != null && !c.threadable.subjectIsReply())) { 199 subjectTable.put(subj, c); 200 count++; 201 } 202 } 203 204 // If the table is empty, we're done 205 if (count == 0) { 206 return; 207 } 208 209 // subjectTable is now populated with one entry for each subject which occurs in the 210 // root set. Iterate over the root set, and gather together the difference. 211 ThreadContainer prev, c, rest; 212 for (prev = null, c = root.child, rest = c.next; c != null; prev = c, c = rest, rest = (rest == null ? null : rest.next)) { 213 Threadable threadable = c.threadable; 214 215 // is it a dummy node? 216 if (threadable == null) { 217 threadable = c.child.threadable; 218 } 219 220 final String subj = threadable.simplifiedSubject(); 221 222 // Dont thread together all subjectless messages 223 if (subj == null || subj.isEmpty()) { 224 continue; 225 } 226 227 final ThreadContainer old = subjectTable.get(subj); 228 229 if (old == c) { // That's us 230 continue; 231 } 232 233 // We have now found another container in the root set with the same subject 234 // Remove the "second" message from the root set 235 if (prev == null) { 236 root.child = c.next; 237 } else { 238 prev.next = c.next; 239 } 240 c.next = null; 241 242 if (old.threadable == null && c.threadable == null) { 243 // both dummies - merge them 244 ThreadContainer tail; 245 for (tail = old.child; tail != null && tail.next != null; tail = tail.next) { 246 // do nothing 247 } 248 249 if (tail != null) { // protect against possible NPE 250 tail.next = c.child; 251 } 252 253 for (tail = c.child; tail != null; tail = tail.next) { 254 tail.parent = old; 255 } 256 257 c.child = null; 258 } else if (old.threadable == null || (c.threadable != null && c.threadable.subjectIsReply() && !old.threadable.subjectIsReply())) { 259 // Else if old is empty, or c has "Re:" and old does not ==> make this message a child of old 260 c.parent = old; 261 c.next = old.child; 262 old.child = c; 263 } else { 264 // else make the old and new messages be children of a new dummy container. 265 // We create a new container object for old.msg and empty the old container 266 final ThreadContainer newc = new ThreadContainer(); 267 newc.threadable = old.threadable; 268 newc.child = old.child; 269 270 for (ThreadContainer tail = newc.child; tail != null; tail = tail.next) { 271 tail.parent = newc; 272 } 273 274 old.threadable = null; 275 old.child = null; 276 277 c.parent = old; 278 newc.parent = old; 279 280 // Old is now a dummy- give it 2 kids , c and newc 281 old.child = c; 282 c.next = newc; 283 } 284 // We've done a merge, so keep the same prev 285 c = prev; 286 } 287 288 subjectTable.clear(); 289 subjectTable = null; 290 291 } 292 293 /** 294 * Delete any empty or dummy ThreadContainers 295 * 296 * @param parent 297 */ 298 private void pruneEmptyContainers(final ThreadContainer parent) { 299 ThreadContainer container, prev, next; 300 for (prev = null, container = parent.child, next = container.next; container != null; prev = container, container = next, next = (container == null 301 ? null 302 : container.next)) { 303 304 // Is it empty and without any children? If so,delete it 305 if (container.threadable == null && container.child == null) { 306 if (prev == null) { 307 parent.child = container.next; 308 } else { 309 prev.next = container.next; 310 } 311 312 // Set container to prev so that prev keeps its same value the next time through the loop 313 container = prev; 314 } 315 316 // Else if empty, with kids, and (not at root or only one kid) 317 else if (container.threadable == null && (container.parent != null || container.child.next == null)) { 318 // We have an invalid/expired message with kids. Promote the kids to this level. 319 ThreadContainer tail; 320 final ThreadContainer kids = container.child; 321 322 // Remove this container and replace with 'kids'. 323 if (prev == null) { 324 parent.child = kids; 325 } else { 326 prev.next = kids; 327 } 328 329 // Make each child's parent be this level's parent -> i.e. promote the children. 330 // Make the last child's next point to this container's next 331 // i.e. splice kids into the list in place of container 332 for (tail = kids; tail.next != null; tail = tail.next) { 333 tail.parent = container.parent; 334 } 335 336 tail.parent = container.parent; 337 tail.next = container.next; 338 339 // next currently points to the item after the inserted items in the chain - reset that so we process the newly 340 // promoted items next time round 341 next = kids; 342 343 // Set container to prev so that prev keeps its same value the next time through the loop 344 container = prev; 345 } else if (container.child != null) { 346 // A real message , with kids 347 // Iterate over the children 348 pruneEmptyContainers(container); 349 } 350 } 351 } 352 353 /** 354 * The client passes in a list of Iterable objects, and the Threader constructs a connected 'graph' of messages 355 * 356 * @param messages iterable of messages to thread, must not be empty 357 * @return null if messages == null or root.child == null or messages list is empty 358 * @since 3.0 359 */ 360 public Threadable thread(final Iterable<? extends Threadable> messages) { 361 if (messages == null) { 362 return null; 363 } 364 365 HashMap<String, ThreadContainer> idTable = new HashMap<>(); 366 367 // walk through each Threadable element 368 for (final Threadable t : messages) { 369 if (!t.isDummy()) { 370 buildContainer(t, idTable); 371 } 372 } 373 374 if (idTable.isEmpty()) { 375 return null; 376 } 377 378 final ThreadContainer root = findRootSet(idTable); 379 idTable.clear(); 380 idTable = null; 381 382 pruneEmptyContainers(root); 383 384 root.reverseChildren(); 385 gatherSubjects(root); 386 387 if (root.next != null) { 388 throw new RuntimeException("root node has a next:" + root); 389 } 390 391 for (ThreadContainer r = root.child; r != null; r = r.next) { 392 if (r.threadable == null) { 393 r.threadable = r.child.threadable.makeDummy(); 394 } 395 } 396 397 final Threadable result = (root.child == null ? null : root.child.threadable); 398 root.flush(); 399 400 return result; 401 } 402 403 /** 404 * The client passes in a list of Threadable objects, and the Threader constructs a connected 'graph' of messages 405 * 406 * @param messages list of messages to thread, must not be empty 407 * @return null if messages == null or root.child == null or messages list is empty 408 * @since 2.2 409 */ 410 public Threadable thread(final List<? extends Threadable> messages) { 411 return thread((Iterable<? extends Threadable>) messages); 412 } 413 414 // DEPRECATED METHODS - for API compatibility only - DO NOT USE 415 416 /** 417 * The client passes in an array of Threadable objects, and the Threader constructs a connected 'graph' of messages 418 * 419 * @param messages array of messages to thread, must not be empty 420 * @return null if messages == null or root.child == null or messages array is empty 421 * @deprecated (2.2) prefer {@link #thread(List)} 422 */ 423 @Deprecated 424 public Threadable thread(final Threadable[] messages) { 425 if (messages == null) { 426 return null; 427 } 428 return thread(Arrays.asList(messages)); 429 } 430 431}