From b38a7c1c4cbf6364da69dbb6010f1d379dcad733 Mon Sep 17 00:00:00 2001 From: spv Date: Mon, 26 Aug 2024 17:31:40 +0200 Subject: [PATCH] some improvements --- Cargo.lock | 131 +++++++++++++++++++++++++++++++++++++++++++++++ Cargo.toml | 2 + answers.json | 2 +- src/collector.rs | 10 +++- src/main.rs | 77 +++++++++++++++++++++++----- 5 files changed, 207 insertions(+), 15 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index c9070e3..9faae19 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -26,6 +26,21 @@ dependencies = [ "memchr", ] +[[package]] +name = "android-tzdata" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" + +[[package]] +name = "android_system_properties" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "819e7219dbd41043ac279b19830f2efc897156490d7fd6ea916720117ee66311" +dependencies = [ + "libc", +] + [[package]] name = "anstream" version = "0.6.15" @@ -153,6 +168,18 @@ version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "chrono" +version = "0.4.38" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a21f936df1771bf62b77f047b726c4625ff2e8aa607c01ec06e5a05bd8463401" +dependencies = [ + "android-tzdata", + "iana-time-zone", + "num-traits", + "windows-targets", +] + [[package]] name = "clap" version = "4.5.16" @@ -199,6 +226,17 @@ version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d3fd119d74b830634cea2a0f58bbd0d54540518a14397557951e79340abc28c0" +[[package]] +name = "colored" +version = "1.9.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a5f741c91823341bebf717d4c71bda820630ce065443b58bd1b7451af008355" +dependencies = [ + "is-terminal", + "lazy_static", + "winapi", +] + [[package]] name = "cookie" version = "0.16.2" @@ -301,6 +339,17 @@ version = "2.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9fc0510504f03c51ada170672ac806f1f105a88aa97a5281117e1ddc3368e51a" +[[package]] +name = "fern" +version = "0.6.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d9f0c14694cbd524c8720dd69b0e3179344f04ebb5f90f2e4a440c6ea3b2f1ee" +dependencies = [ + "chrono", + "colored", + "log", +] + [[package]] name = "fnv" version = "1.0.7" @@ -488,6 +537,12 @@ version = "1.9.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fcc0b4a115bf80b728eb8ea024ad5bd707b615bfed49e0665b6e0f86fd082d9" +[[package]] +name = "humantime" +version = "2.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4" + [[package]] name = "hyper" version = "1.4.1" @@ -561,6 +616,29 @@ dependencies = [ "tracing", ] +[[package]] +name = "iana-time-zone" +version = "0.1.60" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7ffbb5a1b541ea2561f8c41c087286cc091e21e556a4f09a8f6cbf17b69b141" +dependencies = [ + "android_system_properties", + "core-foundation-sys", + "iana-time-zone-haiku", + "js-sys", + "wasm-bindgen", + "windows-core", +] + +[[package]] +name = "iana-time-zone-haiku" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f31827a206f56af32e590ba56d5d2d085f558508192593743f16b2306495269f" +dependencies = [ + "cc", +] + [[package]] name = "idna" version = "0.5.0" @@ -587,6 +665,17 @@ version = "2.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8f518f335dce6725a761382244631d86cf0ccb2863413590b31338feb467f9c3" +[[package]] +name = "is-terminal" +version = "0.4.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f23ff5ef2b80d608d61efee834934d862cd92461afc0560dedf493e4c033738b" +dependencies = [ + "hermit-abi", + "libc", + "windows-sys 0.52.0", +] + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -698,6 +787,15 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +[[package]] +name = "num-traits" +version = "0.2.19" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" +dependencies = [ + "autocfg", +] + [[package]] name = "object" version = "0.36.3" @@ -1144,6 +1242,8 @@ dependencies = [ "anyhow", "clap", "fantoccini", + "fern", + "humantime", "lazy_static", "log", "regex", @@ -1574,6 +1674,37 @@ dependencies = [ "url", ] +[[package]] +name = "winapi" +version = "0.3.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419" +dependencies = [ + "winapi-i686-pc-windows-gnu", + "winapi-x86_64-pc-windows-gnu", +] + +[[package]] +name = "winapi-i686-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" + +[[package]] +name = "winapi-x86_64-pc-windows-gnu" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" + +[[package]] +name = "windows-core" +version = "0.52.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" +dependencies = [ + "windows-targets", +] + [[package]] name = "windows-registry" version = "0.2.0" diff --git a/Cargo.toml b/Cargo.toml index 1047e8a..4aeb690 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -7,6 +7,8 @@ edition = "2021" anyhow = "1.0.86" clap = { version = "4.5.16", features = ["derive"] } fantoccini = "0.21.1" +fern = { version = "0.6.2", features = ["chrono", "colored"] } +humantime = "2.1.0" lazy_static = "1.5.0" log = "0.4.22" regex = "1.10.6" diff --git a/answers.json b/answers.json index 0af6332..79f51e1 100644 --- a/answers.json +++ b/answers.json @@ -1 +1 @@ -[{"upvotes":35051,"author":"unimplemented","content":"35051\n+2200\nYou are a victim of branch prediction fail.\nWhat is Branch Prediction?\nConsider a railroad junction:\nImage by Mecanismo, via Wikimedia Commons. Used under the CC-By-SA 3.0 license.\nNow for the sake of argument, suppose this is back in the 1800s - before long-distance or radio communication.\nYou are a blind operator of a junction and you hear a train coming. You have no idea which way it is supposed to go. You stop the train to ask the driver which direction they want. And then you set the switch appropriately.\nTrains are heavy and have a lot of inertia, so they take forever to start up and slow down.\nIs there a better way? You guess which direction the train will go!\nIf you guessed right, it continues on.\nIf you guessed wrong, the driver will stop, back up, and yell at you to flip the switch. Then it can restart down the other path.\nIf you guess right every time, the train will never have to stop.\nIf you guess wrong too often, the train will spend a lot of time stopping, backing up, and restarting.\nConsider an if-statement: At the processor level, it is a branch instruction:\nYou are a processor and you see a branch. You have no idea which way it will go. What do you do? You halt execution and wait until the previous instructions are complete. Then you continue down the correct path.\nModern processors are complicated and have long pipelines. This means they take forever to \"warm up\" and \"slow down\".\nIs there a better way? You guess which direction the branch will go!\nIf you guessed right, you continue executing.\nIf you guessed wrong, you need to flush the pipeline and roll back to the branch. Then you can restart down the other path.\nIf you guess right every time, the execution will never have to stop.\nIf you guess wrong too often, you spend a lot of time stalling, rolling back, and restarting.\nThis is branch prediction. I admit it's not the best analogy since the train could just signal the direction with a flag. But in computers, the processor doesn't know which direction a branch will go until the last moment.\nHow would you strategically guess to minimize the number of times that the train must back up and go down the other path? You look at the past history! If the train goes left 99% of the time, then you guess left. If it alternates, then you alternate your guesses. If it goes one way every three times, you guess the same...\nIn other words, you try to identify a pattern and follow it. This is more or less how branch predictors work.\nMost applications have well-behaved branches. Therefore, modern branch predictors will typically achieve >90% hit rates. But when faced with unpredictable branches with no recognizable patterns, branch predictors are virtually useless.\nFurther reading: \"Branch predictor\" article on Wikipedia.\nAs hinted from above, the culprit is this if-statement:\nif (data[c] >= 128)\n sum += data[c];\nNotice that the data is evenly distributed between 0 and 255. When the data is sorted, roughly the first half of the iterations will not enter the if-statement. After that, they will all enter the if-statement.\nThis is very friendly to the branch predictor since the branch consecutively goes the same direction many times. Even a simple saturating counter will correctly predict the branch except for the few iterations after it switches direction.\nQuick visualization:\nT = branch taken\nN = branch not taken\n\ndata[] = 0, 1, 2, 3, 4, ... 126, 127, 128, 129, 130, ... 250, 251, 252, ...\nbranch = N N N N N ... N N T T T ... T T T ...\n\n = NNNNNNNNNNNN ... NNNNNNNTTTTTTTTT ... TTTTTTTTTT (easy to predict)\nHowever, when the data is completely random, the branch predictor is rendered useless, because it can't predict random data. Thus there will probably be around 50% misprediction (no better than random guessing).\ndata[] = 226, 185, 125, 158, 198, 144, 217, 79, 202, 118, 14, 150, 177, 182, ...\nbranch = T, T, N, T, T, T, T, N, T, N, N, T, T, T ...\n\n = TTNTTTTNTNNTTT ... (completely random - impossible to predict)\nWhat can be done?\nIf the compiler isn't able to optimize the branch into a conditional move, you can try some hacks if you are willing to sacrifice readability for performance.\nReplace:\nif (data[c] >= 128)\n sum += data[c];\nwith:\nint t = (data[c] - 128) >> 31;\nsum += ~t & data[c];\nThis eliminates the branch and replaces it with some bitwise operations.\n(Note that this hack is not strictly equivalent to the original if-statement. But in this case, it's valid for all the input values of data[].)\nBenchmarks: Core i7 920 @ 3.5 GHz\nC++ - Visual Studio 2010 - x64 Release\nScenario Time (seconds)\nBranching - Random data 11.777\nBranching - Sorted data 2.352\nBranchless - Random data 2.564\nBranchless - Sorted data 2.587\nJava - NetBeans 7.1.1 JDK 7 - x64\nScenario Time (seconds)\nBranching - Random data 10.93293813\nBranching - Sorted data 5.643797077\nBranchless - Random data 3.113581453\nBranchless - Sorted data 3.186068823\nObservations:\nWith the Branch: There is a huge difference between the sorted and unsorted data.\nWith the Hack: There is no difference between sorted and unsorted data.\nIn the C++ case, the hack is actually a tad slower than with the branch when the data is sorted.\nA general rule of thumb is to avoid data-dependent branching in critical loops (such as in this example).\nUpdate:\nGCC 4.6.1 with -O3 or -ftree-vectorize on x64 is able to generate a conditional move, so there is no difference between the sorted and unsorted data - both are fast. This is called \"if-conversion\" (to branchless) and is necessary for vectorization but also sometimes good for scalar.\n(Or somewhat fast: for the already-sorted case, cmov can be slower especially if GCC puts it on the critical path instead of just add, especially on Intel before Broadwell where cmov has 2-cycle latency: gcc optimization flag -O3 makes code slower than -O2)\nVC++ 2010 is unable to generate conditional moves for this branch even under /Ox.\nIntel C++ Compiler (ICC) 11 does something miraculous. It interchanges the two loops, thereby hoisting the unpredictable branch to the outer loop. Not only is it immune to the mispredictions, it's also twice as fast as whatever VC++ and GCC can generate! In other words, ICC took advantage of the test-loop to defeat the benchmark...\nIf you give the Intel compiler the branchless code, it just outright vectorizes it... and is just as fast as with the branch (with the loop interchange).\nClang also vectorizes the if() version, as will GCC 5 and later with -O3, even though it takes quite a few instructions to sign-extend to the 64-bit sum on x86 without SSE4 or AVX2. (-march=x86-64-v2 or v3). See Why is processing an unsorted array the same speed as processing a sorted array with modern x86-64 clang?\nThis goes to show that even mature modern compilers can vary wildly in their ability to optimize code...\n"},{"upvotes":4710,"author":"unimplemented","content":"4710\nBranch prediction.\nWith a sorted array, the condition data[c] >= 128 is first false for a streak of values, then becomes true for all later values. That's easy to predict. With an unsorted array, you pay for the branching cost.\n"},{"upvotes":3829,"author":"unimplemented","content":"3829\n+150\nThe reason why performance improves drastically when the data is sorted is that the branch prediction penalty is removed, as explained beautifully in Mysticial's answer.\nNow, if we look at the code\nif (data[c] >= 128)\n sum += data[c];\nwe can find that the meaning of this particular if... else... branch is to add something when a condition is satisfied. This type of branch can be easily transformed into a conditional move statement, which would be compiled into a conditional move instruction: cmovl, in an x86 system. The branch and thus the potential branch prediction penalty is removed.\nIn C, thus C++, the statement, which would compile directly (without any optimization) into the conditional move instruction in x86, is the ternary operator ... ? ... : .... So we rewrite the above statement into an equivalent one:\nsum += data[c] >=128 ? data[c] : 0;\nWhile maintaining readability, we can check the speedup factor.\nOn an Intel Core i7-2600K @ 3.4 GHz and Visual Studio 2010 Release Mode, the benchmark is:\nx86\nScenario Time (seconds)\nBranching - Random data 8.885\nBranching - Sorted data 1.528\nBranchless - Random data 3.716\nBranchless - Sorted data 3.71\nx64\nScenario Time (seconds)\nBranching - Random data 11.302\nBranching - Sorted data 1.830\nBranchless - Random data 2.736\nBranchless - Sorted data 2.737\nThe result is robust in multiple tests. We get a great speedup when the branch result is unpredictable, but we suffer a little bit when it is predictable. In fact, when using a conditional move, the performance is the same regardless of the data pattern.\nNow let's look more closely by investigating the x86 assembly they generate. For simplicity, we use two functions max1 and max2.\nmax1 uses the conditional branch if... else ...:\nint max1(int a, int b) {\n if (a > b)\n return a;\n else\n return b;\n}\nmax2 uses the ternary operator ... ? ... : ...:\nint max2(int a, int b) {\n return a > b ? a : b;\n}\nOn an x86-64 machine, GCC -S generates the assembly below.\n:max1\n movl %edi, -4(%rbp)\n movl %esi, -8(%rbp)\n movl -4(%rbp), %eax\n cmpl -8(%rbp), %eax\n jle .L2\n movl -4(%rbp), %eax\n movl %eax, -12(%rbp)\n jmp .L4\n.L2:\n movl -8(%rbp), %eax\n movl %eax, -12(%rbp)\n.L4:\n movl -12(%rbp), %eax\n leave\n ret\n\n:max2\n movl %edi, -4(%rbp)\n movl %esi, -8(%rbp)\n movl -4(%rbp), %eax\n cmpl %eax, -8(%rbp)\n cmovge -8(%rbp), %eax\n leave\n ret\nmax2 uses much less code due to the usage of instruction cmovge. But the real gain is that max2 does not involve branch jumps, jmp, which would have a significant performance penalty if the predicted result is not right.\nSo why does a conditional move perform better?\nIn a typical x86 processor, the execution of an instruction is divided into several stages. Roughly, we have different hardware to deal with different stages. So we do not have to wait for one instruction to finish to start a new one. This is called pipelining.\nIn a branch case, the following instruction is determined by the preceding one, so we cannot do pipelining. We have to either wait or predict.\nIn a conditional move case, the execution of conditional move instruction is divided into several stages, but the earlier stages like Fetch and Decode do not depend on the result of the previous instruction; only the latter stages need the result. Thus, we wait a fraction of one instruction's execution time. This is why the conditional move version is slower than the branch when the prediction is easy.\nThe book Computer Systems: A Programmer's Perspective, second edition explains this in detail. You can check Section 3.6.6 for Conditional Move Instructions, entire Chapter 4 for Processor Architecture, and Section 5.11.2 for special treatment for Branch Prediction and Misprediction Penalties.\nSometimes, some modern compilers can optimize our code to assembly with better performance, and sometimes some compilers can't (the code in question is using Visual Studio's native compiler). Knowing the performance difference between a branch and a conditional move when unpredictable can help us write code with better performance when the scenario gets so complex that the compiler can not optimize them automatically.\n"},{"upvotes":2639,"author":"unimplemented","content":"2639\nIf you are curious about even more optimizations that can be done to this code, consider this:\nStarting with the original loop:\nfor (unsigned i = 0; i < 100000; ++i)\n{\n for (unsigned j = 0; j < arraySize; ++j)\n {\n if (data[j] >= 128)\n sum += data[j];\n }\n}\nWith loop interchange, we can safely change this loop to:\nfor (unsigned j = 0; j < arraySize; ++j)\n{\n for (unsigned i = 0; i < 100000; ++i)\n {\n if (data[j] >= 128)\n sum += data[j];\n }\n}\nThen, you can see that the if conditional is constant throughout the execution of the i loop, so you can hoist the if out:\n for (unsigned j = 0; j < arraySize; ++j)\n {\n if (data[j] >= 128)\n {\n for (unsigned i = 0; i < 100000; ++i)\n {\n sum += data[j];\n }\n }\n}\nThen, you see that the inner loop can be collapsed into one single expression, assuming the floating point model allows it (/fp:fast is thrown, for example)\nfor (unsigned j = 0; j < arraySize; ++j)\n{\n if (data[j] >= 128)\n {\n sum += data[j] * 100000;\n }\n}\nThat one is 100,000 times faster than before.\n"},{"upvotes":2186,"author":"unimplemented","content":"2186\nNo doubt some of us would be interested in ways of identifying code that is problematic for the CPU's branch-predictor. The Valgrind tool cachegrind has a branch-predictor simulator, enabled by using the --branch-sim=yes flag. Running it over the examples in this question, with the number of outer loops reduced to 10000 and compiled with g++, gives these results:\nSorted:\n==32551== Branches: 656,645,130 ( 656,609,208 cond + 35,922 ind)\n==32551== Mispredicts: 169,556 ( 169,095 cond + 461 ind)\n==32551== Mispred rate: 0.0% ( 0.0% + 1.2% )\nUnsorted:\n==32555== Branches: 655,996,082 ( 655,960,160 cond + 35,922 ind)\n==32555== Mispredicts: 164,073,152 ( 164,072,692 cond + 460 ind)\n==32555== Mispred rate: 25.0% ( 25.0% + 1.2% )\nDrilling down into the line-by-line output produced by cg_annotate we see for the loop in question:\nSorted:\n Bc Bcm Bi Bim\n 10,001 4 0 0 for (unsigned i = 0; i < 10000; ++i)\n . . . . {\n . . . . // primary loop\n 327,690,000 10,016 0 0 for (unsigned c = 0; c < arraySize; ++c)\n . . . . {\n 327,680,000 10,006 0 0 if (data[c] >= 128)\n 0 0 0 0 sum += data[c];\n . . . . }\n . . . . }\nUnsorted:\n Bc Bcm Bi Bim\n 10,001 4 0 0 for (unsigned i = 0; i < 10000; ++i)\n . . . . {\n . . . . // primary loop\n 327,690,000 10,038 0 0 for (unsigned c = 0; c < arraySize; ++c)\n . . . . {\n 327,680,000 164,050,007 0 0 if (data[c] >= 128)\n 0 0 0 0 sum += data[c];\n . . . . }\n . . . . }\nThis lets you easily identify the problematic line - in the unsorted version the if (data[c] >= 128) line is causing 164,050,007 mispredicted conditional branches (Bcm) under cachegrind's branch-predictor model, whereas it's only causing 10,006 in the sorted version.\nAlternatively, on Linux you can use the performance counters subsystem to accomplish the same task, but with native performance using CPU counters.\nperf stat ./sumtest_sorted\nSorted:\n Performance counter stats for './sumtest_sorted':\n\n 11808.095776 task-clock # 0.998 CPUs utilized \n 1,062 context-switches # 0.090 K/sec \n 14 CPU-migrations # 0.001 K/sec \n 337 page-faults # 0.029 K/sec \n26,487,882,764 cycles # 2.243 GHz \n41,025,654,322 instructions # 1.55 insns per cycle \n 6,558,871,379 branches # 555.455 M/sec \n 567,204 branch-misses # 0.01% of all branches \n\n 11.827228330 seconds time elapsed\nUnsorted:\n Performance counter stats for './sumtest_unsorted':\n\n 28877.954344 task-clock # 0.998 CPUs utilized \n 2,584 context-switches # 0.089 K/sec \n 18 CPU-migrations # 0.001 K/sec \n 335 page-faults # 0.012 K/sec \n65,076,127,595 cycles # 2.253 GHz \n41,032,528,741 instructions # 0.63 insns per cycle \n 6,560,579,013 branches # 227.183 M/sec \n 1,646,394,749 branch-misses # 25.10% of all branches \n\n 28.935500947 seconds time elapsed\nIt can also do source code annotation with dissassembly.\nperf record -e branch-misses ./sumtest_unsorted\nperf annotate -d sumtest_unsorted\n Percent | Source code & Disassembly of sumtest_unsorted\n------------------------------------------------\n...\n : sum += data[c];\n 0.00 : 400a1a: mov -0x14(%rbp),%eax\n 39.97 : 400a1d: mov %eax,%eax\n 5.31 : 400a1f: mov -0x20040(%rbp,%rax,4),%eax\n 4.60 : 400a26: cltq \n 0.00 : 400a28: add %rax,-0x30(%rbp)\n...\nSee the performance tutorial for more details.\n"},{"upvotes":1612,"author":"unimplemented","content":"1612\nI just read up on this question and its answers, and I feel an answer is missing.\nA common way to eliminate branch prediction that I've found to work particularly good in managed languages is a table lookup instead of using a branch (although I haven't tested it in this case).\nThis approach works in general if:\nit's a small table and is likely to be cached in the processor, and\nyou are running things in a quite tight loop and/or the processor can preload the data.\nBackground and why\nFrom a processor perspective, your memory is slow. To compensate for the difference in speed, a couple of caches are built into your processor (L1/L2 cache). So imagine that you're doing your nice calculations and figure out that you need a piece of memory. The processor will get its 'load' operation and loads the piece of memory into cache -- and then uses the cache to do the rest of the calculations. Because memory is relatively slow, this 'load' will slow down your program.\nLike branch prediction, this was optimized in the Pentium processors: the processor predicts that it needs to load a piece of data and attempts to load that into the cache before the operation actually hits the cache. As we've already seen, branch prediction sometimes goes horribly wrong -- in the worst case scenario you need to go back and actually wait for a memory load, which will take forever (in other words: failing branch prediction is bad, a memory load after a branch prediction fail is just horrible!).\nFortunately for us, if the memory access pattern is predictable, the processor will load it in its fast cache and all is well.\nThe first thing we need to know is what is small? While smaller is generally better, a rule of thumb is to stick to lookup tables that are <= 4096 bytes in size. As an upper limit: if your lookup table is larger than 64K it's probably worth reconsidering.\nConstructing a table\nSo we've figured out that we can create a small table. Next thing to do is get a lookup function in place. Lookup functions are usually small functions that use a couple of basic integer operations (and, or, xor, shift, add, remove and perhaps multiply). You want to have your input translated by the lookup function to some kind of 'unique key' in your table, which then simply gives you the answer of all the work you wanted it to do.\nIn this case: >= 128 means we can keep the value, < 128 means we get rid of it. The easiest way to do that is by using an 'AND': if we keep it, we AND it with 7FFFFFFF; if we want to get rid of it, we AND it with 0. Notice also that 128 is a power of 2 -- so we can go ahead and make a table of 32768/128 integers and fill it with one zero and a lot of 7FFFFFFFF's.\nManaged languages\nYou might wonder why this works well in managed languages. After all, managed languages check the boundaries of the arrays with a branch to ensure you don't mess up...\nWell, not exactly... :-)\nThere has been quite some work on eliminating this branch for managed languages. For example:\nfor (int i = 0; i < array.Length; ++i)\n{\n // Use array[i]\n}\nIn this case, it's obvious to the compiler that the boundary condition will never be hit. At least the Microsoft JIT compiler (but I expect Java does similar things) will notice this and remove the check altogether. WOW, that means no branch. Similarly, it will deal with other obvious cases.\nIf you run into trouble with lookups in managed languages -- the key is to add a & 0x[something]FFF to your lookup function to make the boundary check predictable -- and watch it going faster.\nThe result of this case\n// Generate data\nint arraySize = 32768;\nint[] data = new int[arraySize];\n\nRandom random = new Random(0);\nfor (int c = 0; c < arraySize; ++c)\n{\n data[c] = random.Next(256);\n}\n\n/*To keep the spirit of the code intact, I'll make a separate lookup table\n(I assume we cannot modify 'data' or the number of loops)*/\n\nint[] lookup = new int[256];\n\nfor (int c = 0; c < 256; ++c)\n{\n lookup[c] = (c >= 128) ? c : 0;\n}\n\n// Test\nDateTime startTime = System.DateTime.Now;\nlong sum = 0;\n\nfor (int i = 0; i < 100000; ++i)\n{\n // Primary loop\n for (int j = 0; j < arraySize; ++j)\n {\n /* Here you basically want to use simple operations - so no\n random branches, but things like &, |, *, -, +, etc. are fine. */\n sum += lookup[data[j]];\n }\n}\n\nDateTime endTime = System.DateTime.Now;\nConsole.WriteLine(endTime - startTime);\nConsole.WriteLine(\"sum = \" + sum);\nConsole.ReadLine();\n"},{"upvotes":1435,"author":"unimplemented","content":"1435\nAs data is distributed between 0 and 255 when the array is sorted, around the first half of the iterations will not enter the if-statement (the if statement is shared below).\nif (data[c] >= 128)\n sum += data[c];\nThe question is: What makes the above statement not execute in certain cases as in case of sorted data? Here comes the \"branch predictor\". A branch predictor is a digital circuit that tries to guess which way a branch (e.g. an if-then-else structure) will go before this is known for sure. The purpose of the branch predictor is to improve the flow in the instruction pipeline. Branch predictors play a critical role in achieving high effective performance!\nLet's do some bench marking to understand it better\nThe performance of an if-statement depends on whether its condition has a predictable pattern. If the condition is always true or always false, the branch prediction logic in the processor will pick up the pattern. On the other hand, if the pattern is unpredictable, the if-statement will be much more expensive.\nLet’s measure the performance of this loop with different conditions:\nfor (int i = 0; i < max; i++)\n if (condition)\n sum++;\nHere are the timings of the loop with different true-false patterns:\nCondition Pattern Time (ms)\n-------------------------------------------------------\n(i & 0×80000000) == 0 T repeated 322\n\n(i & 0xffffffff) == 0 F repeated 276\n\n(i & 1) == 0 TF alternating 760\n\n(i & 3) == 0 TFFFTFFF… 513\n\n(i & 2) == 0 TTFFTTFF… 1675\n\n(i & 4) == 0 TTTTFFFFTTTTFFFF… 1275\n\n(i & 8) == 0 8T 8F 8T 8F … 752\n\n(i & 16) == 0 16T 16F 16T 16F … 490\nA “bad” true-false pattern can make an if-statement up to six times slower than a “good” pattern! Of course, which pattern is good and which is bad depends on the exact instructions generated by the compiler and on the specific processor.\nSo there is no doubt about the impact of branch prediction on performance!\n"},{"upvotes":1370,"author":"unimplemented","content":"1370\nOne way to avoid branch prediction errors is to build a lookup table, and index it using the data. Stefan de Bruijn discussed that in his answer.\nBut in this case, we know values are in the range [0, 255] and we only care about values >= 128. That means we can easily extract a single bit that will tell us whether we want a value or not: by shifting the data to the right 7 bits, we are left with a 0 bit or a 1 bit, and we only want to add the value when we have a 1 bit. Let's call this bit the \"decision bit\".\nBy using the 0/1 value of the decision bit as an index into an array, we can make code that will be equally fast whether the data is sorted or not sorted. Our code will always add a value, but when the decision bit is 0, we will add the value somewhere we don't care about. Here's the code:\n// Test\nclock_t start = clock();\nlong long a[] = {0, 0};\nlong long sum;\n\nfor (unsigned i = 0; i < 100000; ++i)\n{\n // Primary loop\n for (unsigned c = 0; c < arraySize; ++c)\n {\n int j = (data[c] >> 7);\n a[j] += data[c];\n }\n}\n\ndouble elapsedTime = static_cast(clock() - start) / CLOCKS_PER_SEC;\nsum = a[1];\nThis code wastes half of the adds but never has a branch prediction failure. It's tremendously faster on random data than the version with an actual if statement.\nBut in my testing, an explicit lookup table was slightly faster than this, probably because indexing into a lookup table was slightly faster than bit shifting. This shows how my code sets up and uses the lookup table (unimaginatively called lut for \"LookUp Table\" in the code). Here's the C++ code:\n// Declare and then fill in the lookup table\nint lut[256];\nfor (unsigned c = 0; c < 256; ++c)\n lut[c] = (c >= 128) ? c : 0;\n\n// Use the lookup table after it is built\nfor (unsigned i = 0; i < 100000; ++i)\n{\n // Primary loop\n for (unsigned c = 0; c < arraySize; ++c)\n {\n sum += lut[data[c]];\n }\n}\nIn this case, the lookup table was only 256 bytes, so it fits nicely in a cache and all was fast. This technique wouldn't work well if the data was 24-bit values and we only wanted half of them... the lookup table would be far too big to be practical. On the other hand, we can combine the two techniques shown above: first shift the bits over, then index a lookup table. For a 24-bit value that we only want the top half value, we could potentially shift the data right by 12 bits, and be left with a 12-bit value for a table index. A 12-bit table index implies a table of 4096 values, which might be practical.\nThe technique of indexing into an array, instead of using an if statement, can be used for deciding which pointer to use. I saw a library that implemented binary trees, and instead of having two named pointers (pLeft and pRight or whatever) had a length-2 array of pointers and used the \"decision bit\" technique to decide which one to follow. For example, instead of:\nif (x < node->value)\n node = node->pLeft;\nelse\n node = node->pRight;\nthis library would do something like:\ni = (x < node->value);\nnode = node->link[i];\nHere's a link to this code: Red Black Trees, Eternally Confuzzled\n"},{"upvotes":1230,"author":"unimplemented","content":"1230\nIn the sorted case, you can do better than relying on successful branch prediction or any branchless comparison trick: completely remove the branch.\nIndeed, the array is partitioned in a contiguous zone with data < 128 and another with data >= 128. So you should find the partition point with a dichotomic search (using Lg(arraySize) = 15 comparisons), then do a straight accumulation from that point.\nSomething like (unchecked)\nint i= 0, j, k= arraySize;\nwhile (i < k)\n{\n j= (i + k) >> 1;\n if (data[j] >= 128)\n k= j;\n else\n i= j;\n}\nsum= 0;\nfor (; i < arraySize; i++)\n sum+= data[i];\nor, slightly more obfuscated\nint i, k, j= (i + k) >> 1;\nfor (i= 0, k= arraySize; i < k; (data[j] >= 128 ? k : i)= j)\n j= (i + k) >> 1;\nfor (sum= 0; i < arraySize; i++)\n sum+= data[i];\nA yet faster approach, that gives an approximate solution for both sorted or unsorted is: sum= 3137536; (assuming a truly uniform distribution, 16384 samples with expected value 191.5) :-)\n"},{"upvotes":1034,"author":"unimplemented","content":"1034\nThe above behavior is happening because of Branch prediction.\nTo understand branch prediction one must first understand an Instruction Pipeline.\nThe the steps of running an instruction can be overlapped with the sequence of steps of running the previous and next instruction, so that different steps can be executed concurrently in parallel. This technique is known as instruction pipelining and is used to increase throughput in modern processors. To understand this better please see this example on Wikipedia.\nGenerally, modern processors have quite long (and wide) pipelines, so many instruction can be in flight. See Modern Microprocessors A 90-Minute Guide! which starts by introducing basic in-order pipelining and goes from there.\nBut for ease let's consider a simple in-order pipeline with these 4 steps only.\n(Like a classic 5-stage RISC, but omitting a separate MEM stage.)\nIF -- Fetch the instruction from memory\nID -- Decode the instruction\nEX -- Execute the instruction\nWB -- Write back to CPU register\n4-stage pipeline in general for 2 instructions.\nMoving back to the above question let's consider the following instructions:\n A) if (data[c] >= 128)\n /\\\n / \\\n / \\\n true / \\ false\n / \\\n / \\\n / \\\n / \\\n B) sum += data[c]; C) for loop or print().\nWithout branch prediction, the following would occur:\nTo execute instruction B or instruction C the processor will have to wait (stall) till the instruction A leaves the EX stage in the pipeline, as the decision to go to instruction B or instruction C depends on the result of instruction A. (i.e. where to fetch from next.) So the pipeline will look like this:\nWithout prediction: when if condition is true:\nWithout prediction: When if condition is false:\nAs a result of waiting for the result of instruction A, the total CPU cycles spent in the above case (without branch prediction; for both true and false) is 7.\nSo what is branch prediction?\nBranch predictor will try to guess which way a branch (an if-then-else structure) will go before this is known for sure. It will not wait for the instruction A to reach the EX stage of the pipeline, but it will guess the decision and go to that instruction (B or C in case of our example).\nIn case of a correct guess, the pipeline looks something like this:\nIf it is later detected that the guess was wrong then the partially executed instructions are discarded and the pipeline starts over with the correct branch, incurring a delay. The time that is wasted in case of a branch misprediction is equal to the number of stages in the pipeline from the fetch stage to the execute stage. Modern microprocessors tend to have quite long pipelines so that the misprediction delay is between 10 and 20 clock cycles. The longer the pipeline the greater the need for a good branch predictor.\nIn the OP's code, the first time when the conditional, the branch predictor does not have any information to base up prediction, so the first time it will randomly choose the next instruction. (Or fall back to static prediction, typically forward not-taken, backward taken). Later in the for loop, it can base the prediction on the history. For an array sorted in ascending order, there are three possibilities:\nAll the elements are less than 128\nAll the elements are greater than 128\nSome starting new elements are less than 128 and later it become greater than 128\nLet us assume that the predictor will always assume the true branch on the first run.\nSo in the first case, it will always take the true branch since historically all its predictions are correct. In the 2nd case, initially it will predict wrong, but after a few iterations, it will predict correctly. In the 3rd case, it will initially predict correctly till the elements are less than 128. After which it will fail for some time and the correct itself when it sees branch prediction failure in history.\nIn all these cases the failure will be too less in number and as a result, only a few times it will need to discard the partially executed instructions and start over with the correct branch, resulting in fewer CPU cycles.\nBut in case of a random unsorted array, the prediction will need to discard the partially executed instructions and start over with the correct branch most of the time and result in more CPU cycles compared to the sorted array.\nFurther reading:\nModern Microprocessors A 90-Minute Guide!\nDan Luu's article on branch prediction (which covers older branch predictors, not modern IT-TAGE or Perceptron)\nhttps://en.wikipedia.org/wiki/Branch_predictor\nBranch Prediction and the Performance of Interpreters - Don’t Trust Folklore - 2015 paper showing how well Intel's Haswell does at predicting the indirect branch of a Python interpreter's main loop (historically problematic due to a non-simple pattern), vs. earlier CPUs which didn't use IT-TAGE. (They don't help with this fully random case, though. Still 50% mispredict rate for the if inside the loop on a Skylake CPU when the source is compiled to branch asm.)\nStatic branch prediction on newer Intel processors - what CPUs actually do when running a branch instruction that doesn't have a dynamic prediction available. Historically, forward not-taken (like an if or break), backward taken (like a loop) has been used because it's better than nothing. Laying out code so the fast path / common case minimizes taken branches is good for I-cache density as well as static prediction, so compilers already do that. (That's the real effect of likely / unlikely hints in C source, not actually hinting the hardware branch prediction in most CPU, except maybe via static prediction.)\n"},{"upvotes":907,"author":"unimplemented","content":"907\nAn official answer would be from\nIntel - Avoiding the Cost of Branch Misprediction\nIntel - Branch and Loop Reorganization to Prevent Mispredicts\nScientific papers - branch prediction computer architecture\nBooks: J.L. Hennessy, D.A. Patterson: Computer architecture: a quantitative approach\nArticles in scientific publications: T.Y. Yeh, Y.N. Patt made a lot of these on branch predictions.\nYou can also see from this lovely diagram why the branch predictor gets confused.\nEach element in the original code is a random value\ndata[c] = std::rand() % 256;\nso the predictor will change sides as the std::rand() blow.\nOn the other hand, once it's sorted, the predictor will first move into a state of strongly not taken and when the values change to the high value the predictor will in three runs through change all the way from strongly not taken to strongly taken.\n"},{"upvotes":868,"author":"unimplemented","content":"868\n+100\nIn the same line (I think this was not highlighted by any answer) it's good to mention that sometimes (specially in software where the performance matters—like in the Linux kernel) you can find some if statements like the following:\nif (likely( everything_is_ok ))\n{\n /* Do something */\n}\nor similarly:\nif (unlikely(very_improbable_condition))\n{\n /* Do something */ \n}\nBoth likely() and unlikely() are in fact macros that are defined by using something like the GCC's __builtin_expect to help the compiler insert prediction code to favour the condition taking into account the information provided by the user. GCC supports other builtins that could change the behavior of the running program or emit low level instructions like clearing the cache, etc. See this documentation that goes through the available GCC's builtins.\nNormally this kind of optimizations are mainly found in hard-real time applications or embedded systems where execution time matters and it's critical. For example, if you are checking for some error condition that only happens 1/10000000 times, then why not inform the compiler about this? This way, by default, the branch prediction would assume that the condition is false.\n"},{"upvotes":842,"author":"unimplemented","content":"842\n+25\nFrequently used Boolean operations in C++ produce many branches in the compiled program. If these branches are inside loops and are hard to predict they can slow down execution significantly. Boolean variables are stored as 8-bit integers with the value 0 for false and 1 for true.\nBoolean variables are overdetermined in the sense that all operators that have Boolean variables as input check if the inputs have any other value than 0 or 1, but operators that have Booleans as output can produce no other value than 0 or 1. This makes operations with Boolean variables as input less efficient than necessary. Consider example:\nbool a, b, c, d;\nc = a && b;\nd = a || b;\nThis is typically implemented by the compiler in the following way:\nbool a, b, c, d;\nif (a != 0) {\n if (b != 0) {\n c = 1;\n }\n else {\n goto CFALSE;\n }\n}\nelse {\n CFALSE:\n c = 0;\n}\nif (a == 0) {\n if (b == 0) {\n d = 0;\n }\n else {\n goto DTRUE;\n }\n}\nelse {\n DTRUE:\n d = 1;\n}\nThis code is far from optimal. The branches may take a long time in case of mispredictions. The Boolean operations can be made much more efficient if it is known with certainty that the operands have no other values than 0 and 1. The reason why the compiler does not make such an assumption is that the variables might have other values if they are uninitialized or come from unknown sources. The above code can be optimized if a and b has been initialized to valid values or if they come from operators that produce Boolean output. The optimized code looks like this:\nchar a = 0, b = 1, c, d;\nc = a & b;\nd = a | b;\nchar is used instead of bool in order to make it possible to use the bitwise operators (& and |) instead of the Boolean operators (&& and ||). The bitwise operators are single instructions that take only one clock cycle. The OR operator (|) works even if a and b have other values than 0 or 1. The AND operator (&) and the EXCLUSIVE OR operator (^) may give inconsistent results if the operands have other values than 0 and 1.\n~ can not be used for NOT. Instead, you can make a Boolean NOT on a variable which is known to be 0 or 1 by XOR'ing it with 1:\nbool a, b;\nb = !a;\ncan be optimized to:\nchar a = 0, b;\nb = a ^ 1;\na && b cannot be replaced with a & b if b is an expression that should not be evaluated if a is false ( && will not evaluate b, & will). Likewise, a || b can not be replaced with a | b if b is an expression that should not be evaluated if a is true.\nUsing bitwise operators is more advantageous if the operands are variables than if the operands are comparisons:\nbool a; double x, y, z;\na = x > y && z < 5.0;\nis optimal in most cases (unless you expect the && expression to generate many branch mispredictions).\n"},{"upvotes":504,"author":"unimplemented","content":"504\nThat's for sure!...\nBranch prediction makes the logic run slower, because of the switching which happens in your code! It's like you are going a straight street or a street with a lot of turnings, for sure the straight one is going to be done quicker!...\nIf the array is sorted, your condition is false at the first step: data[c] >= 128, then becomes a true value for the whole way to the end of the street. That's how you get to the end of the logic faster. On the other hand, using an unsorted array, you need a lot of turning and processing which make your code run slower for sure...\nLook at the image I created for you below. Which street is going to be finished faster?\nSo programmatically, branch prediction causes the process to be slower...\nAlso at the end, it's good to know we have two kinds of branch predictions that each is going to affect your code differently:\n1. Static\n2. Dynamic\nSee also this document from Intel, which says:\nStatic branch prediction is used by the microprocessor the first time a conditional branch is encountered, and dynamic branch prediction is used for succeeding executions of the conditional branch code.\nIn order to effectively write your code to take advantage of these rules, when writing if-else or switch statements, check the most common cases first and work progressively down to the least common. Loops do not necessarily require any special ordering of code for static branch prediction, as only the condition of the loop iterator is normally used.\n"},{"upvotes":454,"author":"unimplemented","content":"454\nThis question has already been answered excellently many times over. Still I'd like to draw the group's attention to yet another interesting analysis.\nRecently this example (modified very slightly) was also used as a way to demonstrate how a piece of code can be profiled within the program itself on Windows. Along the way, the author also shows how to use the results to determine where the code is spending most of its time in both the sorted & unsorted case. Finally the piece also shows how to use a little known feature of the HAL (Hardware Abstraction Layer) to determine just how much branch misprediction is happening in the unsorted case.\nThe link is here: A Demonstration of Self-Profiling\n"},{"upvotes":413,"author":"unimplemented","content":"413\nAs what has already been mentioned by others, what behind the mystery is Branch Predictor.\nI'm not trying to add something but explaining the concept in another way. There is a concise introduction on the wiki which contains text and diagram. I do like the explanation below which uses a diagram to elaborate the Branch Predictor intuitively.\nIn computer architecture, a branch predictor is a digital circuit that tries to guess which way a branch (e.g. an if-then-else structure) will go before this is known for sure. The purpose of the branch predictor is to improve the flow in the instruction pipeline. Branch predictors play a critical role in achieving high effective performance in many modern pipelined microprocessor architectures such as x86.\nTwo-way branching is usually implemented with a conditional jump instruction. A conditional jump can either be \"not taken\" and continue execution with the first branch of code which follows immediately after the conditional jump, or it can be \"taken\" and jump to a different place in program memory where the second branch of code is stored. It is not known for certain whether a conditional jump will be taken or not taken until the condition has been calculated and the conditional jump has passed the execution stage in the instruction pipeline (see fig. 1).\nBased on the described scenario, I have written an animation demo to show how instructions are executed in a pipeline in different situations.\nWithout the Branch Predictor.\nWithout branch prediction, the processor would have to wait until the conditional jump instruction has passed the execute stage before the next instruction can enter the fetch stage in the pipeline.\nThe example contains three instructions and the first one is a conditional jump instruction. The latter two instructions can go into the pipeline until the conditional jump instruction is executed.\nIt will take 9 clock cycles for 3 instructions to be completed.\nUse Branch Predictor and don't take a conditional jump. Let's assume that the predict is not taking the conditional jump.\nIt will take 7 clock cycles for 3 instructions to be completed.\nUse Branch Predictor and take a conditional jump. Let's assume that the predict is not taking the conditional jump.\nIt will take 9 clock cycles for 3 instructions to be completed.\nThe time that is wasted in case of a branch misprediction is equal to the number of stages in the pipeline from the fetch stage to the execute stage. Modern microprocessors tend to have quite long pipelines so that the misprediction delay is between 10 and 20 clock cycles. As a result, making a pipeline longer increases the need for a more advanced branch predictor.\nAs you can see, it seems we don't have a reason not to use Branch Predictor.\nIt's quite a simple demo that clarifies the very basic part of Branch Predictor. If those gifs are annoying, please feel free to remove them from the answer and visitors can also get the live demo source code from BranchPredictorDemo\n"},{"upvotes":317,"author":"unimplemented","content":"317\nBranch-prediction gain!\nIt is important to understand that branch misprediction doesn't slow down programs. The cost of a missed prediction is just as if branch prediction didn't exist and you waited for the evaluation of the expression to decide what code to run (further explanation in the next paragraph).\nif (expression)\n{\n // Run 1\n} else {\n // Run 2\n}\nWhenever there's an if-else \\ switch statement, the expression has to be evaluated to determine which block should be executed. In the assembly code generated by the compiler, conditional branch instructions are inserted.\nA branch instruction can cause a computer to begin executing a different instruction sequence and thus deviate from its default behavior of executing instructions in order (i.e. if the expression is false, the program skips the code of the if block) depending on some condition, which is the expression evaluation in our case.\nThat being said, the compiler tries to predict the outcome prior to it being actually evaluated. It will fetch instructions from the if block, and if the expression turns out to be true, then wonderful! We gained the time it took to evaluate it and made progress in the code; if not then we are running the wrong code, the pipeline is flushed, and the correct block is run.\nVisualization:\nLet's say you need to pick route 1 or route 2. Waiting for your partner to check the map, you have stopped at ## and waited, or you could just pick route1 and if you were lucky (route 1 is the correct route), then great you didn't have to wait for your partner to check the map (you saved the time it would have taken him to check the map), otherwise you will just turn back.\nWhile flushing pipelines is super fast, nowadays taking this gamble is worth it. Predicting sorted data or a data that changes slowly is always easier and better than predicting fast changes.\n O Route 1 /-------------------------------\n/|\\ /\n | ---------##/\n/ \\ \\\n \\\n Route 2 \\--------------------------------\n"},{"upvotes":277,"author":"unimplemented","content":"277\nOn ARM, there is no branch needed, because every instruction has a 4-bit condition field, which tests (at zero cost) any of 16 different different conditions that may arise in the Processor Status Register, and if the condition on an instruction is false, the instruction is skipped. This eliminates the need for short branches, and there would be no branch prediction hit for this algorithm. Therefore, the sorted version of this algorithm would run slower than the unsorted version on ARM, because of the extra overhead of sorting.\nThe inner loop for this algorithm would look something like the following in ARM assembly language:\nMOV R0, #0 // R0 = sum = 0\nMOV R1, #0 // R1 = c = 0\nADR R2, data // R2 = addr of data array (put this instruction outside outer loop)\n.inner_loop // Inner loop branch label\n LDRB R3, [R2, R1] // R3 = data[c]\n CMP R3, #128 // compare R3 to 128\n ADDGE R0, R0, R3 // if R3 >= 128, then sum += data[c] -- no branch needed!\n ADD R1, R1, #1 // c++\n CMP R1, #arraySize // compare c to arraySize\n BLT inner_loop // Branch to inner_loop if c < arraySize\nBut this is actually part of a bigger picture:\nCMP opcodes always update the status bits in the Processor Status Register (PSR), because that is their purpose, but most other instructions do not touch the PSR unless you add an optional S suffix to the instruction, specifying that the PSR should be updated based on the result of the instruction. Just like the 4-bit condition suffix, being able to execute instructions without affecting the PSR is a mechanism that reduces the need for branches on ARM, and also facilitates out of order dispatch at the hardware level, because after performing some operation X that updates the status bits, subsequently (or in parallel) you can do a bunch of other work that explicitly should not affect (or be affected by) the status bits, then you can test the state of the status bits set earlier by X.\nThe condition testing field and the optional \"set status bit\" field can be combined, for example:\nADD R1, R2, R3 performs R1 = R2 + R3 without updating any status bits.\nADDGE R1, R2, R3 performs the same operation only if a previous instruction that affected the status bits resulted in a Greater than or Equal condition.\nADDS R1, R2, R3 performs the addition and then updates the N, Z, C and V flags in the Processor Status Register based on whether the result was Negative, Zero, Carried (for unsigned addition), or oVerflowed (for signed addition).\nADDSGE R1, R2, R3 performs the addition only if the GE test is true, and then subsequently updates the status bits based on the result of the addition.\nMost processor architectures do not have this ability to specify whether or not the status bits should be updated for a given operation, which can necessitate writing additional code to save and later restore status bits, or may require additional branches, or may limit the processor's out of order execution efficiency: one of the side effects of most CPU instruction set architectures forcibly updating status bits after most instructions is that it is much harder to tease apart which instructions can be run in parallel without interfering with each other. Updating status bits has side effects, therefore has a linearizing effect on code. ARM's ability to mix and match branch-free condition testing on any instruction with the option to either update or not update the status bits after any instruction is extremely powerful, for both assembly language programmers and compilers, and produces very efficient code.\nWhen you don't have to branch, you can avoid the time cost of flushing the pipeline for what would otherwise be short branches, and you can avoid the design complexity of many forms of speculative evalution. The performance impact of the initial naive imlementations of the mitigations for many recently discovered processor vulnerabilities (Spectre etc.) shows you just how much the performance of modern processors depends upon complex speculative evaluation logic. With a short pipeline and the dramatically reduced need for branching, ARM just doesn't need to rely on speculative evaluation as much as CISC processors. (Of course high-end ARM implementations do include speculative evaluation, but it's a smaller part of the performance story.)\nIf you have ever wondered why ARM has been so phenomenally successful, the brilliant effectiveness and interplay of these two mechanisms (combined with another mechanism that lets you \"barrel shift\" left or right one of the two arguments of any arithmetic operator or offset memory access operator at zero additional cost) are a big part of the story, because they are some of the greatest sources of the ARM architecture's efficiency. The brilliance of the original designers of the ARM ISA back in 1983, Steve Furber and Roger (now Sophie) Wilson, cannot be overstated.\n"},{"upvotes":229,"author":"unimplemented","content":"229\nBesides the fact that the branch prediction may slow you down, a sorted array has another advantage:\nYou can have a stop condition instead of just checking the value, this way you only loop over the relevant data, and ignore the rest.\nThe branch prediction will miss only once.\n // sort backwards (higher values first), may be in some other part of the code\n std::sort(data, data + arraySize, std::greater());\n\n for (unsigned c = 0; c < arraySize; ++c) {\n if (data[c] < 128) {\n break;\n }\n sum += data[c]; \n }\n"},{"upvotes":214,"author":"unimplemented","content":"214\nSorted arrays are processed faster than an unsorted array, due to a phenomena called branch prediction.\nThe branch predictor is a digital circuit (in computer architecture) trying to predict which way a branch will go, improving the flow in the instruction pipeline. The circuit/computer predicts the next step and executes it.\nMaking a wrong prediction leads to going back to the previous step, and executing with another prediction. Assuming the prediction is correct, the code will continue to the next step. A wrong prediction results in repeating the same step, until a correct prediction occurs.\nThe answer to your question is very simple.\nIn an unsorted array, the computer makes multiple predictions, leading to an increased chance of errors. Whereas, in a sorted array, the computer makes fewer predictions, reducing the chance of errors. Making more predictions requires more time.\nSorted Array: Straight Road\n____________________________________________________________________________________\n- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\nTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTTT\nUnsorted Array: Curved Road\n______ ________\n| |__|\nBranch prediction: Guessing/predicting which road is straight and following it without checking\n___________________________________________ Straight road\n |_________________________________________|Longer road\nAlthough both the roads reach the same destination, the straight road is shorter, and the other is longer. If then you choose the other by mistake, there is no turning back, and so you will waste some extra time if you choose the longer road. This is similar to what happens in the computer, and I hope this helped you understand better.\nAlso I want to cite @Simon_Weaver from the comments:\nIt doesn’t make fewer predictions - it makes fewer incorrect predictions. It still has to predict for each time through the loop...\n"},{"upvotes":191,"author":"unimplemented","content":"191\nI tried the same code with MATLAB 2011b with my MacBook Pro (Intel i7, 64 bit, 2.4 GHz) for the following MATLAB code:\n% Processing time with Sorted data vs unsorted data\n%==========================================================================\n% Generate data\narraySize = 32768\nsum = 0;\n% Generate random integer data from range 0 to 255\ndata = randi(256, arraySize, 1);\n\n\n%Sort the data\ndata1= sort(data); % data1= data when no sorting done\n\n\n%Start a stopwatch timer to measure the execution time\ntic;\n\nfor i=1:100000\n\n for j=1:arraySize\n\n if data1(j)>=128\n sum=sum + data1(j);\n end\n end\nend\n\ntoc;\n\nExeTimeWithSorting = toc - tic;\nThe results for the above MATLAB code are as follows:\n a: Elapsed time (without sorting) = 3479.880861 seconds.\n b: Elapsed time (with sorting ) = 2377.873098 seconds.\nThe results of the C code as in @GManNickG I get:\n a: Elapsed time (without sorting) = 19.8761 sec.\n b: Elapsed time (with sorting ) = 7.37778 sec.\nBased on this, it looks MATLAB is almost 175 times slower than the C implementation without sorting and 350 times slower with sorting. In other words, the effect (of branch prediction) is 1.46x for MATLAB implementation and 2.7x for the C implementation.\n"},{"upvotes":123,"author":"unimplemented","content":"123\nThe assumption by other answers that one needs to sort the data is not correct.\nThe following code does not sort the entire array, but only 200-element segments of it, and thereby runs the fastest.\nSorting only k-element sections completes the pre-processing in linear time, O(n), rather than the O(n.log(n)) time needed to sort the entire array.\n#include \n#include \n#include \n\nint main() {\n int data[32768]; const int l = sizeof data / sizeof data[0];\n\n for (unsigned c = 0; c < l; ++c)\n data[c] = std::rand() % 256;\n\n // sort 200-element segments, not the whole array\n for (unsigned c = 0; c + 200 <= l; c += 200)\n std::sort(&data[c], &data[c + 200]);\n\n clock_t start = clock();\n long long sum = 0;\n\n for (unsigned i = 0; i < 100000; ++i) {\n for (unsigned c = 0; c < sizeof data / sizeof(int); ++c) {\n if (data[c] >= 128)\n sum += data[c];\n }\n }\n\n std::cout << static_cast(clock() - start) / CLOCKS_PER_SEC << std::endl;\n std::cout << \"sum = \" << sum << std::endl;\n}\nThis also \"proves\" that it has nothing to do with any algorithmic issue such as sort order, and it is indeed branch prediction.\n"},{"upvotes":114,"author":"unimplemented","content":"114\nBjarne Stroustrup's Answer to this question:\nThat sounds like an interview question. Is it true? How would you know? It is a bad idea to answer questions about efficiency without first doing some measurements, so it is important to know how to measure.\nSo, I tried with a vector of a million integers and got:\nAlready sorted 32995 milliseconds\nShuffled 125944 milliseconds\n\nAlready sorted 18610 milliseconds\nShuffled 133304 milliseconds\n\nAlready sorted 17942 milliseconds\nShuffled 107858 milliseconds\nI ran that a few times to be sure. Yes, the phenomenon is real. My key code was:\nvoid run(vector& v, const string& label)\n{\n auto t0 = system_clock::now();\n sort(v.begin(), v.end());\n auto t1 = system_clock::now();\n cout << label\n << duration_cast(t1 — t0).count()\n << \" milliseconds\n\";\n}\n\nvoid tst()\n{\n vector v(1'000'000);\n iota(v.begin(), v.end(), 0);\n run(v, \"already sorted \");\n std::shuffle(v.begin(), v.end(), std::mt19937{ std::random_device{}() });\n run(v, \"shuffled \");\n}\nAt least the phenomenon is real with this compiler, standard library, and optimizer settings. Different implementations can and do give different answers. In fact, someone did do a more systematic study (a quick web search will find it) and most implementations show that effect.\nOne reason is branch prediction: the key operation in the sort algorithm is “if(v[i] < pivot]) …” or equivalent. For a sorted sequence that test is always true whereas, for a random sequence, the branch chosen varies randomly.\nAnother reason is that when the vector is already sorted, we never need to move elements to their correct position. The effect of these little details is the factor of five or six that we saw.\nQuicksort (and sorting in general) is a complex study that has attracted some of the greatest minds of computer science. A good sort function is a result of both choosing a good algorithm and paying attention to hardware performance in its implementation.\nIf you want to write efficient code, you need to know a bit about machine architecture.\n"},{"upvotes":105,"author":"unimplemented","content":"105\nThis question is rooted in branch prediction models on CPUs. I'd recommend reading this paper:\nIncreasing the Instruction Fetch Rate via Multiple Branch Prediction and a Branch Address Cache (But real CPUs these days still don't make multiple taken branch-predictions per clock cycle, except for Haswell and later effectively unrolling tiny loops in its loop buffer. Modern CPUs can predict multiple branches not-taken to make use of their fetches in large contiguous blocks.)\nWhen you have sorted elements, branch prediction easily predicts correctly except right at the boundary, letting instructions flow through the CPU pipeline efficiently, without having to rewind and take the correct path on mispredictions.\n"},{"upvotes":33,"author":"unimplemented","content":"33\nAn answer for quick and simple understanding (read the others for more details)\nThis concept is called branch prediction\nBranch prediction is an optimization technique that predicts the path the code will take before it is known with certainty. This is important because during the code execution, the machine prefetches several code statements and stores them in the pipeline.\nThe problem arises in conditional branching, where there are two possible paths or parts of the code that can be executed.\nWhen the prediction was true, the optimization technique worked out.\nWhen the prediction was false, to explain it in a simple way, the code statement stored in the pipeline gets proved wrong and the actual code has to be completely reloaded, which takes up a lot of time.\nAs common sense suggests, predictions of something sorted are way more accurate than predictions of something unsorted.\nbranch prediction visualisation:\nsorted\nunsorted\n"},{"upvotes":29235,"author":"unimplemented","content":"29235\n+500\nUndo a commit & redo\n$ git commit -m \"Something terribly misguided\" # (0: Your Accident)\n$ git reset HEAD~ # (1)\n[ edit files as necessary ] # (2)\n$ git add . # (3)\n$ git commit -c ORIG_HEAD # (4)\ngit reset is the command responsible for the undo. It will undo your last commit while leaving your working tree (the state of your files on disk) untouched. You'll need to add them again before you can commit them again.\nMake corrections to working tree files.\ngit add anything that you want to include in your new commit.\nCommit the changes, reusing the old commit message. reset copied the old head to .git/ORIG_HEAD; commit with -c ORIG_HEAD will open an editor, which initially contains the log message from the old commit and allows you to edit it. If you do not need to edit the message, you could use the -C option.\nAlternatively, to edit the previous commit (or just its commit message), commit --amend will add changes within the current index to the previous commit.\nTo remove (not revert) a commit that has been pushed to the server, rewriting history with git push origin main --force[-with-lease] is necessary. It's almost always a bad idea to use --force; prefer --force-with-lease instead, and as noted in the git manual:\nYou should understand the implications of rewriting history if you amend a commit that has already been published.\nFurther Reading\nYou can use git reflog to determine the SHA-1 for the commit to which you wish to revert. Once you have this value, use the sequence of commands as explained above.\nHEAD~ is the same as HEAD~1. The article What is the HEAD in git? is helpful if you want to uncommit multiple commits.\n"},{"upvotes":12799,"author":"unimplemented","content":"12799\nUndoing a commit is a little scary if you don't know how it works. But it's actually amazingly easy if you do understand. I'll show you the 4 different ways you can undo a commit.\nSay you have this, where C is your HEAD and (F) is the state of your files.\n (F)\nA-B-C\n ↑\n master\nOption 1: git reset --hard\nYou want to destroy commit C and also throw away any uncommitted changes. You do this:\ngit reset --hard HEAD~1\nThe result is:\n (F)\nA-B\n ↑\nmaster\nNow B is the HEAD. Because you used --hard, your files are reset to their state at commit B.\nOption 2: git reset\nMaybe commit C wasn't a disaster, but just a bit off. You want to undo the commit but keep your changes for a bit of editing before you do a better commit. Starting again from here, with C as your HEAD:\n (F)\nA-B-C\n ↑\n master\nDo this, leaving off the --hard:\ngit reset HEAD~1\nIn this case the result is:\n (F)\nA-B-C\n ↑\nmaster\nIn both cases, HEAD is just a pointer to the latest commit. When you do a git reset HEAD~1, you tell Git to move the HEAD pointer back one commit. But (unless you use --hard) you leave your files as they were. So now git status shows the changes you had checked into C. You haven't lost a thing!\nOption 3: git reset --soft\nFor the lightest touch, you can even undo your commit but leave your files and your index:\ngit reset --soft HEAD~1\nThis not only leaves your files alone, it even leaves your index alone. When you do git status, you'll see that the same files are in the index as before. In fact, right after this command, you could do git commit and you'd be redoing the same commit you just had.\nOption 4: you did git reset --hard and need to get that code back\nOne more thing: Suppose you destroy a commit as in the first example, but then discover you needed it after all? Tough luck, right?\nNope, there's still a way to get it back. Type this\ngit reflog\nand you'll see a list of (partial) commit SHAs (that is, hashes) that you've moved around in. Find the commit you destroyed, and do this:\ngit checkout -b someNewBranchName shaYouDestroyed\nYou've now resurrected that commit. Commits don't actually get destroyed in Git for some 90 days, so you can usually go back and rescue one you didn't mean to get rid of.\n"},{"upvotes":2745,"author":"unimplemented","content":"2745\nThere are two ways to \"undo\" your last commit, depending on whether or not you have already made your commit public (pushed to your remote repository):\nHow to undo a local commit\nLet's say I committed locally, but now I want to remove that commit.\ngit log\n commit 101: bad commit # Latest commit. This would be called 'HEAD'.\n commit 100: good commit # Second to last commit. This is the one we want.\nTo restore everything back to the way it was prior to the last commit, we need to reset to the commit before HEAD:\ngit reset --soft HEAD^ # Use --soft if you want to keep your changes\ngit reset --hard HEAD^ # Use --hard if you don't care about keeping the changes you made\nNow git log will show that our last commit has been removed.\nHow to undo a public commit\nIf you have already made your commits public, you will want to create a new commit which will \"revert\" the changes you made in your previous commit (current HEAD).\ngit revert HEAD\nYour changes will now be reverted and ready for you to commit:\ngit commit -m 'restoring the file I removed by accident'\ngit log\n commit 102: restoring the file I removed by accident\n commit 101: removing a file we don't need\n commit 100: adding a file that we need\nFor more information, check out Git Basics - Undoing Things.\n"},{"upvotes":1950,"author":"unimplemented","content":"1950\nAdd/remove files to get things the way you want:\ngit rm classdir\ngit add sourcedir\nThen amend the commit:\ngit commit --amend\nThe previous, erroneous commit will be edited to reflect the new index state - in other words, it'll be like you never made the mistake in the first place.\nNote that you should only do this if you haven't pushed yet. If you have pushed, then you'll just have to commit a fix normally.\n"},{"upvotes":1295,"author":"unimplemented","content":"1295\nThis will add a new commit which deletes the added files.\ngit rm yourfiles/*.class\ngit commit -a -m \"deleted all class files in folder 'yourfiles'\"\nOr you can rewrite history to undo the last commit.\nWarning: this command will permanently remove the modifications to the .java files (and any other files) that you committed -- and delete all your changes from your working directory:\ngit reset --hard HEAD~1\nThe hard reset to HEAD-1 will set your working copy to the state of the commit before your wrong commit.\n"},{"upvotes":960,"author":"unimplemented","content":"960\nTo change the last commit\nReplace the files in the index:\ngit rm --cached *.class\ngit add *.java\nThen, if it's a private branch, amend the commit:\ngit commit --amend\nOr, if it's a shared branch, make a new commit:\ngit commit -m 'Replace .class files with .java files'\n\n(To change a previous commit, use the awesome interactive rebase.)\nProTip™: Add *.class to a gitignore to stop this happening again.\nTo revert a commit\nAmending a commit is the ideal solution if you need to change the last commit, but a more general solution is reset.\nYou can reset Git to any commit with:\ngit reset @~N\nWhere N is the number of commits before HEAD, and @~ resets to the previous commit.\nInstead of amending the commit, you could use:\ngit reset @~\ngit add *.java\ngit commit -m \"Add .java files\"\nCheck out git help reset, specifically the sections on --soft --mixed and --hard, for a better understanding of what this does.\nReflog\nIf you mess up, you can always use the reflog to find dropped commits:\n$ git reset @~\n$ git reflog\nc4f708b HEAD@{0}: reset: moving to @~\n2c52489 HEAD@{1}: commit: added some .class files\n$ git reset 2c52489\n... and you're back where you started\n\n"},{"upvotes":846,"author":"unimplemented","content":"846\nUse git revert .\nTo get the commit ID, just use git log.\n"},{"upvotes":700,"author":"unimplemented","content":"700\nIf you are planning to undo a local commit entirely, whatever you change you did on the commit, and if you don't worry anything about that, just do the following command.\ngit reset --hard HEAD^1\n(This command will ignore your entire commit and your changes will be lost completely from your local working tree). If you want to undo your commit, but you want your changes in the staging area (before commit just like after git add) then do the following command.\ngit reset --soft HEAD^1\nNow your committed files come into the staging area. Suppose if you want to upstage the files, because you need to edit some wrong content, then do the following command\ngit reset HEAD\nNow committed files to come from the staged area into the unstaged area. Now files are ready to edit, so whatever you change, you want to go edit and added it and make a fresh/new commit.\nMore (link broken) (Archived version)\n"},{"upvotes":626,"author":"unimplemented","content":"626\nIf you have Git Extras installed, you can run git undo to undo the latest commit. git undo 3 will undo the last three commits.\n"},{"upvotes":587,"author":"unimplemented","content":"587\nI wanted to undo the latest five commits in our shared repository. I looked up the revision id that I wanted to rollback to. Then I typed in the following.\nprompt> git reset --hard 5a7404742c85\nHEAD is now at 5a74047 Added one more page to catalogue\nprompt> git push origin master --force\nTotal 0 (delta 0), reused 0 (delta 0)\nremote: bb/acl: neoneye is allowed. accepted payload.\nTo git@bitbucket.org:thecompany/prometheus.git\n + 09a6480...5a74047 master -> master (forced update)\nprompt>\n"},{"upvotes":548,"author":"unimplemented","content":"548\nI prefer to use git rebase -i for this job, because a nice list pops up where I can choose the commits to get rid of. It might not be as direct as some other answers here, but it just feels right.\nChoose how many commits you want to list, then invoke like this (to enlist last three)\ngit rebase -i HEAD~3\nSample list\npick aa28ba7 Sanity check for RtmpSrv port\npick c26c541 RtmpSrv version option\npick 58d6909 Better URL decoding support\nThen Git will remove commits for any line that you remove.\n"},{"upvotes":511,"author":"unimplemented","content":"511\nHow to fix the previous local commit\nUse git-gui (or similar) to perform a git commit --amend. From the GUI you can add or remove individual files from the commit. You can also modify the commit message.\nHow to undo the previous local commit\nJust reset your branch to the previous location (for example, using gitk or git rebase). Then reapply your changes from a saved copy. After garbage collection in your local repository, it will be like the unwanted commit never happened. To do all of that in a single command, use git reset HEAD~1.\nWord of warning: Careless use of git reset is a good way to get your working copy into a confusing state. I recommend that Git novices avoid this if they can.\nHow to undo a public commit\nPerform a reverse cherry pick (git-revert) to undo the changes.\nIf you haven't yet pulled other changes onto your branch, you can simply do...\ngit revert --no-edit HEAD\nThen push your updated branch to the shared repository.\nThe commit history will show both commits, separately.\nAdvanced: Correction of the private branch in public repository\nThis can be dangerous -- be sure you have a local copy of the branch to repush.\nAlso note: You don't want to do this if someone else may be working on the branch.\ngit push --delete (branch_name) ## remove public version of branch\nClean up your branch locally then repush...\ngit push origin (branch_name)\nIn the normal case, you probably needn't worry about your private-branch commit history being pristine. Just push a followup commit (see 'How to undo a public commit' above), and later, do a squash-merge to hide the history.\n"},{"upvotes":423,"author":"unimplemented","content":"423\nIf you want to permanently undo it and you have cloned some repository.\nThe commit id can be seen by:\ngit log \nThen you can do like:\ngit reset --hard \n\ngit push origin -f\n"},{"upvotes":415,"author":"unimplemented","content":"415\nIf you have committed junk but not pushed,\ngit reset --soft HEAD~1\nHEAD~1 is a shorthand for the commit before head. Alternatively you can refer to the SHA-1 of the hash if you want to reset to. --soft option will delete the commit but it will leave all your changed files \"Changes to be committed\", as git status would put it.\nIf you want to get rid of any changes to tracked files in the working tree since the commit before head use \"--hard\" instead.\nOR\nIf you already pushed and someone pulled which is usually my case, you can't use git reset. You can however do a git revert,\ngit revert HEAD\nThis will create a new commit that reverses everything introduced by the accidental commit.\n"},{"upvotes":357,"author":"unimplemented","content":"357\nOn SourceTree (GUI for GitHub), you may right-click the commit and do a 'Reverse Commit'. This should undo your changes.\nOn the terminal:\nYou may alternatively use:\ngit revert\nOr:\ngit reset --soft HEAD^ # Use --soft if you want to keep your changes.\ngit reset --hard HEAD^ # Use --hard if you don't care about keeping your changes.\n"},{"upvotes":328,"author":"unimplemented","content":"328\nA single command:\ngit reset --soft 'HEAD^' \nIt works great to undo the last local commit!\n"},{"upvotes":309,"author":"unimplemented","content":"309\nJust reset it doing the command below using git:\ngit reset --soft HEAD~1\nExplain: what git reset does, it's basically reset to any commit you'd like to go back to, then if you combine it with --soft key, it will go back, but keep the changes in your file(s), so you get back to the stage which the file was just added, HEAD is the head of the branch and if you combine with ~1 (in this case you also use HEAD^), it will go back only one commit which what you want...\nI create the steps in the image below in more details for you, including all steps that may happens in real situations and committing the code:\n"},{"upvotes":299,"author":"unimplemented","content":"299\n\"Reset the working tree to the last commit\"\ngit reset --hard HEAD^ \n\"Clean unknown files from the working tree\"\ngit clean \nsee - Git Quick Reference\nNOTE: This command will delete your previous commit, so use with caution! git reset --hard is safer.\n"},{"upvotes":280,"author":"unimplemented","content":"280\nHow to undo the last Git commit?\nTo restore everything back to the way it was prior to the last commit, we need to reset to the commit before HEAD.\nIf you don't want to keep your changes that you made:\ngit reset --hard HEAD^\nIf you want to keep your changes:\ngit reset --soft HEAD^\nNow check your git log. It will show that our last commit has been removed.\n"},{"upvotes":229,"author":"unimplemented","content":"229\nUse reflog to find a correct state\ngit reflog\nREFLOG BEFORE RESET\nSelect the correct reflog (f3cb6e2 in my case) and type\ngit reset --hard f3cb6e2\nAfter that the repo HEAD will be reset to that HEADid LOG AFTER RESET\nFinally the reflog looks like the picture below\nREFLOG FINAL\n"},{"upvotes":201,"author":"unimplemented","content":"201\nFirst run:\ngit reflog\nIt will show you all the possible actions you have performed on your repository, for example, commit, merge, pull, etc.\nThen do:\ngit reset --hard ActionIdFromRefLog\n"},{"upvotes":186,"author":"unimplemented","content":"186\nUndo last commit:\ngit reset --soft HEAD^ or git reset --soft HEAD~\nThis will undo the last commit.\nHere --soft means reset into staging.\nHEAD~ or HEAD^ means to move to commit before HEAD.\nReplace last commit to new commit:\ngit commit --amend -m \"message\"\nIt will replace the last commit with the new commit.\n"},{"upvotes":184,"author":"unimplemented","content":"184\nAnother way:\nCheckout the branch you want to revert, then reset your local working copy back to the commit that you want to be the latest one on the remote server (everything after it will go bye-bye). To do this, in SourceTree I right-clicked on the and selected \"Reset BRANCHNAME to this commit\".\nThen navigate to your repository's local directory and run this command:\ngit -c diff.mnemonicprefix=false -c core.quotepath=false push -v -f --tags REPOSITORY_NAME BRANCHNAME:BRANCHNAME\nThis will erase all commits after the current one in your local repository but only for that one branch.\n"},{"upvotes":172,"author":"unimplemented","content":"172\nType git log and find the last commit hash code and then enter:\ngit reset \n"},{"upvotes":168,"author":"unimplemented","content":"168\nIn my case I accidentally committed some files I did not want to. So I did the following and it worked:\ngit reset --soft HEAD^\ngit rm --cached [files you do not need]\ngit add [files you need]\ngit commit -c ORIG_HEAD\nVerify the results with gitk or git log --stat\n"},{"upvotes":166,"author":"unimplemented","content":"166\nWHAT TO USE, reset --soft or reset --hard?\nI am just adding two cents for @Kyralessa's answer:\nIf you are unsure what to use go for --soft (I used this convention to remember it --soft for safe).\nWhy?\nIf you choose --hard by mistake you will LOSE your changes as it wasn't before. If you choose --soft by mistake you can achieve the same results of --hard by applying additional commands\ngit reset HEAD file.html\ngit checkout -- file.html\nFull example\necho \"some changes...\" > file.html\ngit add file.html\ngit commit -m \"wrong commit\"\n\n# I need to reset\ngit reset --hard HEAD~1 (cancel changes)\n# OR\ngit reset --soft HEAD~1 # Back to staging\ngit reset HEAD file.html # back to working directory\ngit checkout -- file.html # cancel changes\nCredits goes to @Kyralessa.\n"},{"upvotes":162,"author":"unimplemented","content":"162\nSimple, run this in your command line:\ngit reset --soft HEAD~ \n"},{"upvotes":155,"author":"unimplemented","content":"155\nThere are many ways to do it:\nGit command to undo the last commit/ previous commits:\nWarning: Do Not use --hard if you do not know what you are doing. --hard is too dangerous, and it might delete your files.\nBasic command to revert the commit in Git is:\n$ git reset --hard \nor\n$ git reset --hard HEAD~\nCOMMIT-ID: ID for the commit\nn: is the number of last commits you want to revert\nYou can get the commit id as shown below:\n$ **git log --oneline**\n\nd81d3f1 function to subtract two numbers\n\nbe20eb8 function to add two numbers\n\nbedgfgg function to multiply two numbers\nwhere d81d3f1 and be20eb8 are commit id.\nNow, let's see some cases:\nSuppose you want to revert the last commit 'd81d3f1'. Here are two options:\n$ git reset --hard d81d3f1\nor\n$ git reset --hard HEAD~1\nSuppose you want to revert the commit 'be20eb8':\n$ git reset --hard be20eb8\nFor more detailed information, you can refer to and try out some other commands too for resetting the head to a specified state:\n$ git reset --help\n"},{"upvotes":151,"author":"unimplemented","content":"151\nFor a local commit\ngit reset --soft HEAD~1\nor if you do not remember exactly in which commit it is, you might use\ngit rm --cached \nFor a pushed commit\nThe proper way of removing files from the repository history is using git filter-branch. That is,\ngit filter-branch --index-filter 'git rm --cached ' HEAD\nBut I recomnend you use this command with care. Read more at git-filter-branch(1) Manual Page.\n"},{"upvotes":149,"author":"unimplemented","content":"149\nThere are two main scenarios\nYou haven't pushed the commit yet\nIf the problem was extra files you commited (and you don't want those on repository), you can remove them using git rm and then commiting with --amend\ngit rm \nYou can also remove entire directories with -r, or even combine with other Bash commands\ngit rm -r \ngit rm $(find -name '*.class')\nAfter removing the files, you can commit, with --amend option\ngit commit --amend -C HEAD # the -C option is to use the same commit message\nThis will rewrite your recent local commit removing the extra files, so, these files will never be sent on push and also will be removed from your local .git repository by GC.\nYou already pushed the commit\nYou can apply the same solution of the other scenario and then doing git push with the -f option, but it is not recommended since it overwrites the remote history with a divergent change (it can mess your repository).\nInstead, you have to do the commit without --amend (remember this about -amend`: That option rewrites the history on the last commit).\n"},{"upvotes":26056,"author":"unimplemented","content":"26056\nExecutive Summary\ngit push -d # Delete remote\ngit branch -d # Delete local\nNote: In most cases, will be origin.\nDelete Local Branch\nTo delete the local branch, use one of the following:\ngit branch -d \ngit branch -D \nThe -d option is an alias for --delete, which only deletes the branch if it has already been fully merged in its upstream branch.\nThe -D option is an alias for --delete --force, which deletes the branch \"irrespective of its merged status.\" [Source: man git-branch]\nAs of Git v2.3, git branch -d (delete) learned to honor the -f (force) flag.\nYou will receive an error if you try to delete the currently selected branch.\nDelete Remote Branch\nAs of Git v1.7.0, you can delete a remote branch using\n$ git push --delete \nwhich might be easier to remember than\n$ git push :\nwhich was added in Git v1.5.0 \"to delete a remote branch or a tag.\"\nStarting with Git v2.8.0, you can also use git push with the -d option as an alias for --delete. Therefore, the version of Git you have installed will dictate whether you need to use the easier or harder syntax.\nDelete Remote Branch [Original Answer from 5-Jan-2010]\nFrom Chapter 3 of Pro Git by Scott Chacon:\nDeleting Remote Branches\nSuppose you’re done with a remote branch — say, you and your collaborators are finished with a feature and have merged it into your remote’s main branch (or whatever branch your stable code-line is in). You can delete a remote branch using the rather obtuse syntax git push [remotename] :[branch]. If you want to delete your serverfix branch from the server, you run the following:\n$ git push origin :serverfix\nTo git@github.com:schacon/simplegit.git\n - [deleted] serverfix\nBoom. No more branches on your server. You may want to dog-ear this page, because you’ll need that command, and you’ll likely forget the syntax. A way to remember this command is by recalling the git push [remotename] [localbranch]:[remotebranch] syntax that we went over a bit earlier. If you leave off the [localbranch] portion, then you’re basically saying, “Take nothing on my side and make it be [remotebranch].”\nI ran git push origin :bugfix, and it worked beautifully. Scott Chacon was right—I will want to dog-ear that page (or virtually dog ear-by answering this on Stack Overflow).\nFinally, execute the following on other machines to propagate changes:\n# Fetch changes from all remotes and locally delete \n# remote deleted branches/tags etc\n# --prune will do the job :-;\ngit fetch --all --prune\n"},{"upvotes":3766,"author":"unimplemented","content":"3766\nMatthew’s answer is great for removing remote branches and I also appreciate the explanation, but to make a simple distinction between the two commands:\nto remove a local branch from your machine: git branch -d {local_branch} (use -D instead to force deleting the branch without checking merged status);\nto remove a remote branch from the server: git push origin -d {remote_branch}.\nReference: Git: Delete a branch (local or remote).\n"},{"upvotes":2729,"author":"unimplemented","content":"2729\nThe short answers\nIf you want more detailed explanations of the following commands, then see the long answers in the next section.\nDeleting a remote branch\ngit push origin --delete # Git version 1.7.0 or newer\ngit push origin -d # Shorter version (Git 1.7.0 or newer)\ngit push origin : # Git versions older than 1.7.0\nDeleting a local branch\ngit branch --delete \ngit branch -d # Shorter version\ngit branch -D # Force-delete un-merged branches\nDeleting a local remote-tracking branch\ngit branch --delete --remotes /\ngit branch -dr / # Shorter\n\ngit fetch --prune # Delete multiple obsolete remote-tracking branches\ngit fetch -p # Shorter\nThe long answer: there are three different branches to delete!\nWhen you're dealing with deleting branches both locally and remotely, keep in mind that there are three different branches involved:\nThe local branch X.\nThe remote origin branch X.\nThe local remote-tracking branch origin/X that tracks the remote branch X.\nThe original poster used:\ngit branch -rd origin/bugfix\nWhich only deleted his local remote-tracking branch origin/bugfix, and not the actual remote branch bugfix on origin.\nTo delete that actual remote branch, you need\ngit push origin --delete bugfix\nAdditional details\nThe following sections describe additional details to consider when deleting your remote and remote-tracking branches.\nPushing to delete remote branches also removes remote-tracking branches\nNote that deleting the remote branch X from the command line using a git push will also remove the local remote-tracking branch origin/X, so it is not necessary to prune the obsolete remote-tracking branch with git fetch --prune or git fetch -p. However, it wouldn't hurt if you did it anyway.\nYou can verify that the remote-tracking branch origin/X was also deleted by running the following:\n# View just remote-tracking branches\ngit branch --remotes\ngit branch -r\n\n# View both strictly local as well as remote-tracking branches\ngit branch --all\ngit branch -a\nPruning the obsolete local remote-tracking branch origin/X\nIf you didn't delete your remote branch X from the command line (like above), then your local repository will still contain (a now obsolete) remote-tracking branch origin/X. This can happen if you deleted a remote branch directly through GitHub's web interface, for example.\nA typical way to remove these obsolete remote-tracking branches (since Git version 1.6.6) is to simply run git fetch with the --prune or shorter -p. Note that this removes all obsolete local remote-tracking branches for any remote branches that no longer exist on the remote:\ngit fetch origin --prune\ngit fetch origin -p # Shorter\nHere is the relevant quote from the 1.6.6 release notes (emphasis mine):\n\"git fetch\" learned --all and --multiple options, to run fetch from many repositories, and --prune option to remove remote tracking branches that went stale. These make \"git remote update\" and \"git remote prune\" less necessary (there is no plan to remove \"remote update\" nor \"remote prune\", though).\nAlternative to above automatic pruning for obsolete remote-tracking branches\nAlternatively, instead of pruning your obsolete local remote-tracking branches through git fetch -p, you can avoid making the extra network operation by just manually removing the branch(es) with the --remotes or -r flags:\ngit branch --delete --remotes origin/X\ngit branch -dr origin/X # Shorter\nSee Also\ngit-branch(1) Manual Page.\ngit-fetch(1) Manual Page.\nPro Git § 3.5 Git Branching - Remote Branches.\n"},{"upvotes":1726,"author":"unimplemented","content":"1726\nSteps for deleting a branch:\nFor deleting the remote branch:\ngit push origin --delete \nFor deleting the local branch, you have three ways:\n1: git branch -D \n\n2: git branch --delete --force # Same as -D\n\n3: git branch --delete # Error on unmerge\nExplain: OK, just explain what's going on here!\nSimply do git push origin --delete to delete your remote branch only, add the name of the branch at the end and this will delete and push it to remote at the same time...\nAlso, git branch -D, which simply delete the local branch only!...\n-D stands for --delete --force which will delete the branch even it's not merged (force delete), but you can also use -d which stands for --delete which throw an error respective of the branch merge status...\nI also create the image below to show the steps:\n"},{"upvotes":924,"author":"unimplemented","content":"924\n+300\nYou can also use the following to delete the remote branch\ngit push --delete origin serverfix\nWhich does the same thing as\ngit push origin :serverfix\nbut it may be easier to remember.\n"},{"upvotes":611,"author":"unimplemented","content":"611\nIt's very simple:\nTo delete the remote branch\ngit push -d origin \nOr\ngit push origin :\n-- You can also delete tags with this syntax\nTo forcefully delete local branch\ngit branch -D \nNote: do a git fetch --all --prune on other machines after deleting remote branch, to remove obsolete tracking branches.\nExample\nto remove local branch\ngit branch -D my-local-branch\nto remove remote branch\ngit push origin :my-remote-branch\nWith the new version of git, its also possible to remove branch with\ngit push origin --delete \nTIP: if you want to see all available branches you can use git branch -a,\nand to see just remote branches, you can use git branch -r\n"},{"upvotes":418,"author":"unimplemented","content":"418\nTip: When you delete branches using\ngit branch -d # Deletes local branch\nor\ngit push origin : # Deletes remote branch\nonly the references are deleted. Even though the branch is actually removed on the remote, the references to it still exists in the local repositories of your team members. This means that for other team members the deleted branches are still visible when they do a git branch -a.\nTo solve this, your team members can prune the deleted branches with\ngit remote prune \nThis is typically git remote prune origin.\n"},{"upvotes":394,"author":"unimplemented","content":"394\nIf you want to delete a branch, first checkout to the branch other than the branch to be deleted.\ngit checkout other_than_branch_to_be_deleted\nDeleting the local branch:\ngit branch -D branch_to_be_deleted\nDeleting the remote branch:\ngit push origin --delete branch_to_be_deleted\n"},{"upvotes":291,"author":"unimplemented","content":"291\ngit branch -D \ngit branch -D -r origin/\ngit push origin :\n"},{"upvotes":251,"author":"unimplemented","content":"251\nThis is simple: Just run the following command:\nTo delete a Git branch both locally and remotely, first delete the local branch using this command:\ngit branch -d example\n(Here example is the branch name.)\nAnd after that, delete the remote branch using this command:\ngit push origin :example\n"},{"upvotes":230,"author":"unimplemented","content":"230\nAnother approach is:\ngit push --prune origin\nWARNING: This will delete all remote branches that do not exist locally. Or more comprehensively,\ngit push --mirror\nwill effectively make the remote repository look like the local copy of the repository (local heads, remotes and tags are mirrored on remote).\n"},{"upvotes":191,"author":"unimplemented","content":"191\nI use the following in my Bash settings:\nalias git-shoot=\"git push origin --delete\"\nThen you can call:\ngit-shoot branchname\n"},{"upvotes":160,"author":"unimplemented","content":"160\nDelete locally:\nTo delete a local branch, you can use:\ngit branch -d \nTo delete a branch forcibly, use -D instead of -d.\ngit branch -D \nDelete remotely:\nThere are two options:\ngit push origin :branchname\n\ngit push origin --delete branchname\nI would suggest you use the second way as it is more intuitive.\n"},{"upvotes":155,"author":"unimplemented","content":"155\nIf you want to complete both these steps with a single command, you can make an alias for it by adding the below to your ~/.gitconfig:\n[alias]\n rmbranch = \"!f(){ git branch -d ${1} && git push origin --delete ${1}; };f\"\nAlternatively, you can add this to your global configuration from the command line using\ngit config --global alias.rmbranch \\\n'!f(){ git branch -d ${1} && git push origin --delete ${1}; };f'\nNOTE: If using -d (lowercase d), the branch will only be deleted if it has been merged. To force the delete to happen, you will need to use -D (uppercase D).\n"},{"upvotes":150,"author":"unimplemented","content":"150\nSince January 2013, GitHub included a Delete branch button next to each branch in your \"Branches\" page.\nRelevant blog post: Create and delete branches\n"},{"upvotes":131,"author":"unimplemented","content":"131\nTo delete your branch locally and remotely\nCheckout to master branch - git checkout master\nDelete your remote branch - git push origin --delete \nDelete your local branch - git branch --delete \n"},{"upvotes":127,"author":"unimplemented","content":"127\nYou can also do this using git remote prune origin\n$ git remote prune origin\nPruning origin\nURL: git@example.com/yourrepo.git\n * [pruned] origin/some-branchs\nIt prunes and deletes remote-tracking branches from a git branch -r listing.\n"},{"upvotes":124,"author":"unimplemented","content":"124\nIn addition to the other answers, I often use the git_remote_branch tool. It's an extra install, but it gets you a convenient way to interact with remote branches. In this case, to delete:\ngrb delete branch\nI find that I also use the publish and track commands quite often.\n"},{"upvotes":118,"author":"unimplemented","content":"118\nA one-liner command to delete both local, and remote:\nD=branch-name; git branch -D $D; git push origin :$D\nOr add the alias below to your ~/.gitconfig. Usage: git kill branch-name\n[alias]\n kill = \"!f(){ git branch -D \\\"$1\\\"; git push origin --delete \\\"$1\\\"; };f\"\n"},{"upvotes":106,"author":"unimplemented","content":"106\nDeleting Branches\nLet's assume our work on branch \"contact-form\" is done and we've already integrated it into \"master\". Since we don't need it anymore, we can delete it (locally):\n$ git branch -d contact-form\nAnd for deleting the remote branch:\ngit push origin --delete contact-form\n"},{"upvotes":99,"author":"unimplemented","content":"99\nTo delete locally - (normal)\ngit branch -d my_branch\nIf your branch is in a rebasing/merging progress and that was not done properly, it means you will get an error, Rebase/Merge in progress, so in that case, you won't be able to delete your branch.\nSo either you need to solve the rebasing/merging. Otherwise, you can do force delete by using,\ngit branch -D my_branch\nTo delete in remote:\ngit push --delete origin my_branch\nYou can do the same using:\ngit push origin :my_branch # Easy to remember both will do the same.\nGraphical representation:\n"},{"upvotes":97,"author":"unimplemented","content":"97\nDelete remote branch\ngit push origin :\nDelete local branch\ngit branch -D \nDelete local branch steps:\ncheckout to another branch\ndelete local branch\n"},{"upvotes":95,"author":"unimplemented","content":"95\nSimply say:\ngit branch -d \ngit push origin :\n"},{"upvotes":93,"author":"unimplemented","content":"93\nNow you can do it with the GitHub Desktop application.\nAfter launching the application\nClick on the project containing the branch\nSwitch to the branch you would like to delete\nFrom the \"Branch\" menu, select, \"Unpublish...\", to have the branch deleted from the GitHub servers.\nFrom the \"Branch\" menu, select, 'Delete \"branch_name\"...', to have the branch deleted off of your local machine (AKA the machine you are currently working on)\n"},{"upvotes":91,"author":"unimplemented","content":"91\ngit push origin --delete \nis easier to remember than\ngit push origin :branchName\n"},{"upvotes":82,"author":"unimplemented","content":"82\nThis won't work if you have a tag with the same name as the branch on the remote:\n$ git push origin :branch-or-tag-name\nerror: dst refspec branch-or-tag-name matches more than one.\nerror: failed to push some refs to 'git@github.com:SomeName/some-repo.git'\nIn that case you need to specify that you want to delete the branch, not the tag:\ngit push origin :refs/heads/branch-or-tag-name\nSimilarly, to delete the tag instead of the branch you would use:\ngit push origin :refs/tags/branch-or-tag-name\n"},{"upvotes":63,"author":"unimplemented","content":"63\nMany of the other answers will lead to errors/warnings. This approach is relatively fool proof although you may still need git branch -D branch_to_delete if it's not fully merged into some_other_branch, for example.\ngit checkout some_other_branch\ngit push origin :branch_to_delete\ngit branch -d branch_to_delete\nRemote pruning isn't needed if you deleted the remote branch. It's only used to get the most up-to-date remotes available on a repository you're tracking. I've observed git fetch will add remotes, not remove them. Here's an example of when git remote prune origin will actually do something:\nUser A does the steps above. User B would run the following commands to see the most up-to-date remote branches:\ngit fetch\ngit remote prune origin\ngit branch -r\n"},{"upvotes":63,"author":"unimplemented","content":"63\nAccording to the latest document using a terminal we can delete in the following way.\nDelete in local:\ngit branch -D usermanagement\nDelete in remote location:\ngit push --delete origin usermanagement\n"},{"upvotes":61,"author":"unimplemented","content":"61\nI got sick of googling for this answer, so I took a similar approach to the answer that crizCraig posted earlier.\nI added the following to my Bash profile:\nfunction gitdelete(){\n git push origin --delete $1\n git branch -D $1\n}\nThen every time I'm done with a branch (merged into master, for example) I run the following in my terminal:\ngitdelete my-branch-name\n...which then deletes my-branch-name from origin as as well as locally.\n"},{"upvotes":59,"author":"unimplemented","content":"59\nUse:\ngit push origin :bugfix # Deletes remote branch\ngit branch -d bugfix # Must delete local branch manually\nIf you are sure you want to delete it, run\ngit branch -D bugfix\nNow to clean up deleted remote branches run\ngit remote prune origin\n"},{"upvotes":11567,"author":"unimplemented","content":"11567\nIn the simplest terms, git pull does a git fetch followed by a git merge.\ngit fetch updates your remote-tracking branches under refs/remotes//. This operation is safe to run at any time since it never changes any of your local branches under refs/heads.\ngit pull brings a local branch up-to-date with its remote version, while also updating your other remote-tracking branches.\nFrom the Git documentation for git pull:\ngit pull runs git fetch with the given parameters and then depending on configuration options or command line flags, will call either git rebase or git merge to reconcile diverging branches.\n"},{"upvotes":2636,"author":"unimplemented","content":"2636\ngit pull tries to automatically merge after fetching commits. It is context sensitive, so all pulled commits will be merged into your currently active branch. git pull automatically merges the commits without letting you review them first. If you don’t carefully manage your branches, you may run into frequent conflicts.\ngit fetch gathers any commits from the target branch that do not exist in the current branch and stores them in your local repository. However, it does not merge them with your current branch. This is particularly useful if you need to keep your repository up to date, but are working on something that might break if you update your files. To integrate the commits into your current branch, you must use git merge afterwards.\n"},{"upvotes":1429,"author":"unimplemented","content":"1429\nIt is important to contrast the design philosophy of git with the philosophy of a more traditional source control tool like SVN.\nSubversion was designed and built with a client/server model. There is a single repository that is the server, and several clients can fetch code from the server, work on it, then commit it back to the server. The assumption is that the client can always contact the server when it needs to perform an operation.\nGit was designed to support a more distributed model with no need for a central repository (though you can certainly use one if you like). Also git was designed so that the client and the \"server\" don't need to be online at the same time. Git was designed so that people on an unreliable link could exchange code via email, even. It is possible to work completely disconnected and burn a CD to exchange code via git.\nIn order to support this model git maintains a local repository with your code and also an additional local repository that mirrors the state of the remote repository. By keeping a copy of the remote repository locally, git can figure out the changes needed even when the remote repository is not reachable. Later when you need to send the changes to someone else, git can transfer them as a set of changes from a point in time known to the remote repository.\ngit fetch is the command that says \"bring my local copy of the remote repository up to date.\"\ngit pull says \"bring the changes in the remote repository to where I keep my own code.\"\nNormally git pull does this by doing a git fetch to bring the local copy of the remote repository up to date, and then merging the changes into your own code repository and possibly your working copy.\nThe take away is to keep in mind that there are often at least three copies of a project on your workstation. One copy is your own repository with your own commit history. The second copy is your working copy where you are editing and building. The third copy is your local \"cached\" copy of a remote repository.\n"},{"upvotes":1165,"author":"unimplemented","content":"1165\nHere is Oliver Steele's image of how all it all fits together:\n"},{"upvotes":596,"author":"unimplemented","content":"596\nOne use case of git fetch is that the following will tell you any changes in the remote branch since your last pull... so you can check before doing an actual pull, which could change files in your current branch and working copy.\ngit fetch\ngit diff ...origin\nSee git diff documentation regarding the double- .. and triple-dot ... syntax.\n"},{"upvotes":434,"author":"unimplemented","content":"434\nIt cost me a little bit to understand what was the difference, but this is a simple explanation. master in your localhost is a branch.\nWhen you clone a repository you fetch the entire repository to you local host. This means that at that time you have an origin/master pointer to HEAD and master pointing to the same HEAD.\nwhen you start working and do commits you advance the master pointer to HEAD + your commits. But the origin/master pointer is still pointing to what it was when you cloned.\nSo the difference will be:\nIf you do a git fetch it will just fetch all the changes in the remote repository (GitHub) and move the origin/master pointer to HEAD. Meanwhile your local branch master will keep pointing to where it has.\nIf you do a git pull, it will do basically fetch (as explained previously) and merge any new changes to your master branch and move the pointer to HEAD.\n"},{"upvotes":302,"author":"unimplemented","content":"302\nSometimes a visual representation helps.\n"},{"upvotes":302,"author":"unimplemented","content":"302\nEven more briefly\ngit fetch fetches updates but does not merge them.\ngit pull does a git fetch under the hood and then a merge.\nBriefly\ngit fetch is similar to pull but doesn't merge. i.e. it fetches remote updates (refs and objects) but your local stays the same (i.e. origin/master gets updated but master stays the same) .\ngit pull pulls down from a remote and instantly merges.\nMore\ngit clone clones a repo.\ngit rebase saves stuff from your current branch that isn't in the upstream branch to a temporary area. Your branch is now the same as before you started your changes. So, git pull -rebase will pull down the remote changes, rewind your local branch, replay your changes over the top of your current branch one by one until you're up-to-date.\nAlso, git branch -a will show you exactly what’s going on with all your branches - local and remote.\nThis blog post was useful:\nThe difference between git pull, git fetch and git clone (and git rebase) - Mike Pearce\nand covers git pull, git fetch, git clone and git rebase.\nUPDATE\nI thought I'd update this to show how you'd actually use this in practice.\nUpdate your local repo from the remote (but don't merge):\n git fetch \nAfter downloading the updates, let's see the differences:\n git diff master origin/master \nIf you're happy with those updates, then merge:\n git pull\nNotes:\nOn step 2: For more on diffs between local and remotes, see: How to compare a local Git branch with its remote branch\nOn step 3: It's probably more accurate (e.g. on a fast changing repo) to do a git rebase origin here. See @Justin Ohms comment in another answer.\nSee also: http://longair.net/blog/2009/04/16/git-fetch-and-merge/\nNote also: I've mentioned a merge during a pull however you can configure a pull to use a rebase instead.\n"},{"upvotes":208,"author":"unimplemented","content":"208\ngit-pull - Fetch from and merge with another repository or a local branch\nSYNOPSIS\n\ngit pull …\nDESCRIPTION\n\nRuns git-fetch with the given parameters, and calls git-merge to merge the \nretrieved head(s) into the current branch. With --rebase, calls git-rebase \ninstead of git-merge.\n\nNote that you can use . (current directory) as the to pull \nfrom the local repository — this is useful when merging local branches \ninto the current branch.\n\nAlso note that options meant for git-pull itself and underlying git-merge \nmust be given before the options meant for git-fetch.\nYou would pull if you want the histories merged, you'd fetch if you just 'want the codez' as some person has been tagging some articles around here.\n"},{"upvotes":191,"author":"unimplemented","content":"191\nOK, here is some information about git pull and git fetch, so you can understand the actual differences... in few simple words, fetch gets the latest data, but not the code changes and not going to mess with your current local branch code, but pull get the code changes and merge it your local branch straight away, read on to get more details about each:\ngit fetch\nIt will download all refs and objects and any new branches to your local Repository...\nFetch branches and/or tags (collectively, \"refs\") from one or more other repositories, along with the objects necessary to complete their histories. Remote-tracking branches are updated (see the description of below for ways to control this behavior).\nBy default, any tag that points into the histories being fetched is also fetched; the effect is to fetch tags that point at branches that you are interested in. This default behavior can be changed by using the --tags or --no-tags options or by configuring remote..tagOpt. By using a refspec that fetches tags explicitly, you can fetch tags that do not point into branches you are interested in as well.\ngit fetch can fetch from either a single named repository or URL or from several repositories at once if is given and there is a remotes. entry in the configuration file. (See git-config1).\nWhen no remote is specified, by default the origin remote will be used, unless there’s an upstream branch configured for the current branch.\nThe names of refs that are fetched, together with the object names they point at, are written to .git/FETCH_HEAD. This information may be used by scripts or other git commands, such as git-pull.\ngit pull\nIt will apply the changes from remote to the current branch in local...\nIncorporates changes from a remote repository into the current branch. In its default mode, git pull is shorthand for git fetch followed by git merge FETCH_HEAD.\nMore precisely, git pull runs git fetch with the given parameters and calls git merge to merge the retrieved branch heads into the current branch. With --rebase, it runs git rebase instead of git merge.\nshould be the name of a remote repository as passed to git-fetch1. can name an arbitrary remote ref (for example, the name of a tag) or even a collection of refs with corresponding remote-tracking branches (e.g., refs/heads/:refs/remotes/origin/), but usually it is the name of a branch in the remote repository.\nDefault values for and are read from the \"remote\" and \"merge\" configuration for the current branch as set by git-branch --track.\nI also create the visual below to show you how git fetch and git pull working together...\n"},{"upvotes":183,"author":"unimplemented","content":"183\nThe short and easy answer is that git pull is simply git fetch followed by git merge.\nIt is very important to note that git pull will automatically merge whether you like it or not. This could, of course, result in merge conflicts. Let's say your remote is origin and your branch is master. If you git diff origin/master before pulling, you should have some idea of potential merge conflicts and could prepare your local branch accordingly.\nIn addition to pulling and pushing, some workflows involve git rebase, such as this one, which I paraphrase from the linked article:\ngit pull origin master\ngit checkout foo-branch\ngit rebase master\ngit push origin foo-branch\nIf you find yourself in such a situation, you may be tempted to git pull --rebase. Unless you really, really know what you are doing, I would advise against that. This warning is from the man page for git-pull, version 2.3.5:\nThis is a potentially dangerous mode of operation. It rewrites history, which does not bode well when you published that history already. Do not use this option unless you have read git-rebase(1) carefully.\n"},{"upvotes":182,"author":"unimplemented","content":"182\nYou can fetch from a remote repository, see the differences and then pull or merge.\nThis is an example for a remote repository called origin and a branch called master tracking the remote branch origin/master:\ngit checkout master \ngit fetch \ngit diff origin/master\ngit rebase origin master\n"},{"upvotes":161,"author":"unimplemented","content":"161\nThis interactive graphical representation is very helpful in understanging git: http://ndpsoftware.com/git-cheatsheet.html\ngit fetch just \"downloads\" the changes from the remote to your local repository. git pull downloads the changes and merges them into your current branch. \"In its default mode, git pull is shorthand for git fetch followed by git merge FETCH_HEAD.\"\n"},{"upvotes":151,"author":"unimplemented","content":"151\nBonus:\nIn speaking of pull & fetch in the above answers, I would like to share an interesting trick,\ngit pull --rebase\nThis above command is the most useful command in my git life which saved a lots of time.\nBefore pushing your new commits to server, try this command and it will automatically sync latest server changes (with a fetch + merge) and will place your commit at the top in git log. No need to worry about manual pull/merge.\nFind details at: http://gitolite.com/git-pull--rebase\n"},{"upvotes":134,"author":"unimplemented","content":"134\nI like to have some visual representation of the situation to grasp these things. Maybe other developers would like to see it too, so here's my addition. I'm not totally sure that it all is correct, so please comment if you find any mistakes.\n LOCAL SYSTEM\n . ===================================================== \n================= . ================= =================== =============\nREMOTE REPOSITORY . REMOTE REPOSITORY LOCAL REPOSITORY WORKING COPY\n(ORIGIN) . (CACHED) \nfor example, . mirror of the \na github repo. . remote repo\nCan also be .\nmultiple repo's .\n .\n .\nFETCH *------------------>*\nYour local cache of the remote is updated with the origin (or multiple\nexternal sources, that is git's distributed nature)\n .\nPULL *-------------------------------------------------------->*\nchanges are merged directly into your local copy. when conflicts occur, \nyou are asked for decisions.\n .\nCOMMIT . *<---------------*\nWhen coming from, for example, subversion, you might think that a commit\nwill update the origin. In git, a commit is only done to your local repo.\n .\nPUSH *<---------------------------------------*\nSynchronizes your changes back into the origin.\nSome major advantages for having a fetched mirror of the remote are:\nPerformance (scroll through all commits and messages without trying to squeeze it through the network)\nFeedback about the state of your local repo (for example, I use Atlassian's SourceTree, which will give me a bulb indicating if I'm commits ahead or behind compared to the origin. This information can be updated with a GIT FETCH).\n"},{"upvotes":128,"author":"unimplemented","content":"128\nThe Difference between GIT Fetch and GIT Pull can be explained with the following scenario: (Keeping in mind that pictures speak louder than words!, I have provided pictorial representation)\nLet's take an example that you are working on a project with your team members. So there will be one main Branch of the project and all the contributors must fork it to their own local repository and then work on this local branch to modify/Add modules then push back to the main branch.\nSo, Initial State of the two Branches when you forked the main project on your local repository will be like this- (A, B and C are Modules already completed of the project)\nNow, you have started working on the new module (suppose D) and when you have completed the D module you want to push it to the main branch, But meanwhile what happens is that one of your teammates has developed new Module E, F and modified C.\nSo now what has happened is that your local repository is lacking behind the original progress of the project and thus pushing of your changes to the main branch can lead to conflict and may cause your Module D to malfunction.\nTo avoid such issues and to work parallel with the original progress of the project there are Two ways:\n1. Git Fetch- This will Download all the changes that have been made to the origin/main branch project which are not present in your local branch. And will wait for the Git Merge command to apply the changes that have been fetched to your Repository or branch.\nSo now You can carefully monitor the files before merging it to your repository. And you can also modify D if required because of Modified C.\n2. Git Pull- This will update your local branch with the origin/main branch i.e. actually what it does is a combination of Git Fetch and Git merge one after another. But this may Cause Conflicts to occur, so it’s recommended to use Git Pull with a clean copy.\n"},{"upvotes":118,"author":"unimplemented","content":"118\nIn simple terms, if you were about to hop onto a plane without any Internet connection… before departing you could just do git fetch origin . It would fetch all the changes into your computer, but keep it separate from your local development/workspace.\nOn the plane, you could make changes to your local workspace and then merge it with what you've previously fetched and then resolve potential merge conflicts all without a connection to the Internet. And unless someone had made new changes to the remote repository then, upon arrive at the destination you would do git push origin and go get your coffee.\nFrom this awesome Atlassian tutorial:\nThe git fetch command downloads commits, files, and refs from a remote repository into your local repository.\nFetching is what you do when you want to see what everybody else has been working on. It’s similar to SVN update in that it lets you see how the central history has progressed, but it doesn’t force you to actually merge the changes into your repository. Git isolates fetched content as a from existing local content, it has absolutely no effect on your local development work. Fetched content has to be explicitly checked out using the git checkout command. This makes fetching a safe way to review commits before integrating them with your local repository.\nWhen downloading content from a remote repository, git pull and git fetch commands are available to accomplish the task. You can consider git fetch the 'safe' version of the two commands. It will download the remote content, but not update your local repository's working state, leaving your current work intact. git pull is the more aggressive alternative, it will download the remote content for the active local branch and immediately execute git merge to create a merge commit for the new remote content. If you have pending changes in progress this will cause conflicts and kickoff the merge conflict resolution flow.\nWith git pull:\nYou don't get any isolation.\nIt doesn't need to be explicitly checked out. Because it implicitly does a git merge.\nThe merging step will affect your local development and may cause conflicts\nIt's basically NOT safe. It's aggressive.\nUnlike git fetch where it only affects your .git/refs/remotes, git pull will affect both your .git/refs/remotes and .git/refs/heads/\nHmmm...so if I'm not updating the working copy with git fetch, then where am I making changes? Where does Git fetch store the new commits?\nGreat question. First and foremost, the heads or remotes don't store the new commits. They just have pointers to commits. So with git fetch you download the latest git objects (blob, tree, commits. To fully understand the objects watch this video on git internals), but only update your remotes pointer to point to the latest commit of that branch. It's still isolated from your working copy, because your branch's pointer in the heads directory hasn't updated. It will only update upon a merge/pull. But again where? Let's find out.\nIn your project directory (i.e., where you do your git commands) do:\nls. This will show the files & directories. Nothing cool, I know.\nNow do ls -a. This will show dot files, i.e., files beginning with . You will then be able to see a directory named: .git.\nDo cd .git. This will obviously change your directory.\nNow comes the fun part; do ls. You will see a list of directories. We're looking for refs. Do cd refs.\nIt's interesting to see what's inside all directories, but let's focus on two of them. heads and remotes. Use cd to check inside them too.\nAny git fetch that you do will update the pointer in the /.git/refs/remotes directory. It won't update anything in the /.git/refs/heads directory.\nAny git pull will first do the git fetch and update items in the /.git/refs/remotes directory. It will then also merge with your local and then change the head inside the /.git/refs/heads directory.\nA very good related answer can also be found in Where does 'git fetch' place itself?.\nAlso, look for \"Slash notation\" from the Git branch naming conventions post. It helps you better understand how Git places things in different directories.\nTo see the actual difference\nJust do:\ngit fetch origin master\ngit checkout master\nIf the remote master was updated you'll get a message like this:\nYour branch is behind 'origin/master' by 2 commits, and can be fast-forwarded.\n (use \"git pull\" to update your local branch)\nIf you didn't fetch and just did git checkout master then your local git wouldn't know that there are 2 commits added. And it would just say:\nAlready on 'master'\nYour branch is up to date with 'origin/master'.\nBut that's outdated and incorrect. It's because git will give you feedback solely based on what it knows. It's oblivious to new commits that it hasn't pulled down yet...\nIs there any way to see the new changes made in remote while working on the branch locally?\nSome IDEs (e.g. Xcode) are super smart and use the result of a git fetch and can annotate the lines of code that have been changed in remote branch of your current working branch. If that line has been changed by both local changes and remote branch, then that line gets annotated with red. This isn't a merge conflict. It's a potential merge conflict. It's a headsup that you can use to resolve the future merge conflict before doing git pull from the remote branch.\nFun tip:\nIf you fetched a remote branch e.g. did:\ngit fetch origin feature/123\nThen this would go into your remotes directory. It's still not available to your local directory. However, it simplifies your checkout to that remote branch by DWIM (Do what I mean):\ngit checkout feature/123\nyou no longer need to do:\ngit checkout -b feature/123 origin/feature/123\nFor more on that read here\n"},{"upvotes":115,"author":"unimplemented","content":"115\nI have struggled with this as well. In fact I got here with a google search of exactly the same question. Reading all these answers finally painted a picture in my head and I decided to try to get this down looking at the state of the 2 repositories and 1 sandbox and actions performed over time while watching the version of them. So here is what I came up with. Please correct me if I messed up anywhere.\nThe three repos with a fetch:\n--------------------- ----------------------- -----------------------\n- Remote Repo - - Remote Repo - - Remote Repo -\n- - - gets pushed - - -\n- @ R01 - - @ R02 - - @ R02 -\n--------------------- ----------------------- -----------------------\n\n--------------------- ----------------------- -----------------------\n- Local Repo - - Local Repo - - Local Repo -\n- pull - - - - fetch -\n- @ R01 - - @ R01 - - @ R02 -\n--------------------- ----------------------- -----------------------\n\n--------------------- ----------------------- -----------------------\n- Local Sandbox - - Local Sandbox - - Local Sandbox -\n- Checkout - - new work done - - -\n- @ R01 - - @ R01+ - - @R01+ -\n--------------------- ----------------------- -----------------------\nThe three repos with a pull\n--------------------- ----------------------- -----------------------\n- Remote Repo - - Remote Repo - - Remote Repo -\n- - - gets pushed - - -\n- @ R01 - - @ R02 - - @ R02 -\n--------------------- ----------------------- -----------------------\n\n--------------------- ----------------------- -----------------------\n- Local Repo - - Local Repo - - Local Repo -\n- pull - - - - pull -\n- @ R01 - - @ R01 - - @ R02 -\n--------------------- ----------------------- -----------------------\n\n--------------------- ----------------------- -----------------------\n- Local Sandbox - - Local Sandbox - - Local Sandbox -\n- Checkout - - new work done - - merged with R02 -\n- @ R01 - - @ R01+ - - @R02+ -\n--------------------- ----------------------- -----------------------\nThis helped me understand why a fetch is pretty important.\n"},{"upvotes":101,"author":"unimplemented","content":"101\nWe simply say:\ngit pull == git fetch + git merge\nIf you run git pull, you do not need to merge the data to local. If you run git fetch, it means you must run git merge for getting the latest code to your local machine. Otherwise, the local machine code would not be changed without merge.\nSo in the Git Gui, when you do fetch, you have to merge the data. Fetch itself won't make the code changes at your local. You can check that when you update the code by fetching once fetch and see; the code it won't change. Then you merge... You will see the changed code.\n"},{"upvotes":90,"author":"unimplemented","content":"90\ngit fetch pulls down the code from the remote server to your tracking branches in your local repository. If your remote is named origin (the default) then these branches will be within origin/, for example origin/master, origin/mybranch-123, etc. These are not your current branches, they are local copies of those branches from the server.\ngit pull does a git fetch but then also merges the code from the tracking branch into your current local version of that branch. If you're not ready for that changes yet, just git fetch first.\n"},{"upvotes":85,"author":"unimplemented","content":"85\ngit fetch will retrieve remote branches so that you can git diff or git merge them with the current branch. git pull will run fetch on the remote brach tracked by the current branch and then merge the result. You can use git fetch to see if there are any updates to the remote branch without necessary merging them with your local branch.\n"},{"upvotes":83,"author":"unimplemented","content":"83\nGit Fetch\nYou download changes to your local branch from origin through fetch. Fetch asks the remote repo for all commits that others have made but you don't have on your local repo. Fetch downloads these commits and adds them to the local repository.\nGit Merge\nYou can apply changes downloaded through fetch using the merge command. Merge will take the commits retrieved from fetch and try to add them to your local branch. The merge will keep the commit history of your local changes so that when you share your branch with push, Git will know how others can merge your changes.\nGit Pull\nFetch and merge run together often enough that a command that combines the two, pull, was created. Pull does a fetch and then a merge to add the downloaded commits into your local branch.\n"},{"upvotes":57,"author":"unimplemented","content":"57\nThe only difference between git pull and git fetch is that :\ngit pull pulls from a remote branch and merges it.\ngit fetch only fetches from the remote branch but it does not merge\ni.e. git pull = git fetch + git merge ...\n"},{"upvotes":49,"author":"unimplemented","content":"49\nThe git pull command is actually a shortcut for git fetch followed by the git merge or the git rebase command depending on your configuration. You can configure your Git repository so that git pull is a fetch followed by a rebase.\n"},{"upvotes":48,"author":"unimplemented","content":"48\nGit allows chronologically older commits to be applied after newer commits. Because of this, the act of transferring commits between repositories is split into two steps:\nCopying new commits from remote branch to copy of this remote branch inside local repo.\n(repo to repo operation) master@remote >> remote/origin/master@local\nIntegrating new commits to local branch\n(inside-repo operation) remote/origin/master@local >> master@local\nThere are two ways of doing step 2. You can:\nFork local branch after last common ancestor and add new commits parallel to commits which are unique to local repository, finalized by merging commit, closing the fork.\nInsert new commits after last common ancestor and reapply commits unique to local repository.\nIn git terminology, step 1 is git fetch, step 2 is git merge or git rebase\ngit pull is git fetch and git merge\n"},{"upvotes":41,"author":"unimplemented","content":"41\nWhat is the difference between git pull and git fetch?\nTo understand this, you first need to understand that your local git maintains not only your local repository, but it also maintains a local copy of the remote repository.\ngit fetch brings your local copy of the remote repository up to date. For example, if your remote repository is GitHub - you may want to fetch any changes made in the remote repository to your local copy of it the remote repository. This will allow you to perform operations such as compare or merge.\ngit pull on the other hand will bring down the changes in the remote repository to where you keep your own code. Typically, git pull will do a git fetch first to bring the local copy of the remote repository up to date, and then it will merge the changes into your own code repository and possibly your working copy.\n"},{"upvotes":40,"author":"unimplemented","content":"40\nGit obtains the branch of the latest version from the remote to the local using two commands:\ngit fetch: Git is going to get the latest version from remote to local, but it do not automatically merge. git fetch origin master git log -p master..origin/master git merge origin/master\n The commands above mean that download latest version of the main branch from origin from the remote to origin master branch. And then compares the local master branch and origin master branch. Finally, merge.\ngit pull: Git is going to get the latest version from the remote and merge into the local.\n git pull origin master\n The command above is the equivalent to git fetch and git merge. In practice, git fetch maybe more secure because before the merge we can see the changes and decide whether to merge.\n"},{"upvotes":39,"author":"unimplemented","content":"39\nA simple Graphical Representation for Beginners,\nhere,\ngit pull \nwill fetch code from repository and rebase with your local... in git pull there is possibility of new commits getting created.\nbut in ,\ngit fetch\nwill fetch code from repository and we need to rebase it manually by using git rebase\neg: i am going to fetch from server master and rebase it in my local master.\n1) git pull ( rebase will done automatically):\ngit pull origin master\nhere origin is your remote repo master is your branch\n2) git fetch (need to rebase manually):\ngit fetch origin master\nit will fetch server changes from origin. and it will be in your local until you rebase it on your own. we need to fix conflicts manually by checking codes.\ngit rebase origin/master\nthis will rebase code into local. before that ensure you're in right branch.\n"},{"upvotes":38,"author":"unimplemented","content":"38\nActually Git maintains a copy of your own code and the remote repository.\nThe command git fetch makes your local copy up to date by getting data from remote repository. The reason we need this is because somebody else might have made some changes to the code and you want to keep yourself updated.\nThe command git pull brings the changes in the remote repository to where you keep your own code. Normally, git pull does this by doing a ‘git fetch’ first to bring the local copy of the remote repository up to date, and then it merges the changes into your own code repository and possibly your working copy.\n"},{"upvotes":37,"author":"unimplemented","content":"37\ngit pull == ( git fetch + git merge)\ngit fetch does not changes to local branches.\nIf you already have a local repository with a remote set up for the desired project, you can grab all branches and tags for the existing remote using git fetch . ... Fetch does not make any changes to local branches, so you will need to merge a remote branch with a paired local branch to incorporate newly fetch changes. from github\n"},{"upvotes":18079,"author":"unimplemented","content":"18079\n+600\nTo understand what yield does, you must understand what generators are. And before you can understand generators, you must understand iterables.\nIterables\nWhen you create a list, you can read its items one by one. Reading its items one by one is called iteration:\n>>> mylist = [1, 2, 3]\n>>> for i in mylist:\n... print(i)\n1\n2\n3\nmylist is an iterable. When you use a list comprehension, you create a list, and so an iterable:\n>>> mylist = [x*x for x in range(3)]\n>>> for i in mylist:\n... print(i)\n0\n1\n4\nEverything you can use \"for... in...\" on is an iterable; lists, strings, files...\nThese iterables are handy because you can read them as much as you wish, but you store all the values in memory and this is not always what you want when you have a lot of values.\nGenerators\nGenerators are iterators, a kind of iterable you can only iterate over once. Generators do not store all the values in memory, they generate the values on the fly:\n>>> mygenerator = (x*x for x in range(3))\n>>> for i in mygenerator:\n... print(i)\n0\n1\n4\nIt is just the same except you used () instead of []. BUT, you cannot perform for i in mygenerator a second time since generators can only be used once: they calculate 0, then forget about it and calculate 1, and end after calculating 4, one by one.\nYield\nyield is a keyword that is used like return, except the function will return a generator.\n>>> def create_generator():\n... mylist = range(3)\n... for i in mylist:\n... yield i*i\n...\n>>> mygenerator = create_generator() # create a generator\n>>> print(mygenerator) # mygenerator is an object!\n\n>>> for i in mygenerator:\n... print(i)\n0\n1\n4\nHere it's a useless example, but it's handy when you know your function will return a huge set of values that you will only need to read once.\nTo master yield, you must understand that when you call the function, the code you have written in the function body does not run. The function only returns the generator object, this is a bit tricky.\nThen, your code will continue from where it left off each time for uses the generator.\nNow the hard part:\nThe first time the for calls the generator object created from your function, it will run the code in your function from the beginning until it hits yield, then it'll return the first value of the loop. Then, each subsequent call will run another iteration of the loop you have written in the function and return the next value. This will continue until the generator is considered empty, which happens when the function runs without hitting yield. That can be because the loop has come to an end, or because you no longer satisfy an \"if/else\".\nYour code explained\nGenerator:\n# Here you create the method of the node object that will return the generator\ndef _get_child_candidates(self, distance, min_dist, max_dist):\n\n # Here is the code that will be called each time you use the generator object:\n\n # If there is still a child of the node object on its left\n # AND if the distance is ok, return the next child\n if self._leftchild and distance - max_dist < self._median:\n yield self._leftchild\n\n # If there is still a child of the node object on its right\n # AND if the distance is ok, return the next child\n if self._rightchild and distance + max_dist >= self._median:\n yield self._rightchild\n\n # If the function arrives here, the generator will be considered empty\n # There are no more than two values: the left and the right children\nCaller:\n# Create an empty list and a list with the current object reference\nresult, candidates = list(), [self]\n\n# Loop on candidates (they contain only one element at the beginning)\nwhile candidates:\n\n # Get the last candidate and remove it from the list\n node = candidates.pop()\n\n # Get the distance between obj and the candidate\n distance = node._get_dist(obj)\n\n # If the distance is ok, then you can fill in the result\n if distance <= max_dist and distance >= min_dist:\n result.extend(node._values)\n\n # Add the children of the candidate to the candidate's list\n # so the loop will keep running until it has looked\n # at all the children of the children of the children, etc. of the candidate\n candidates.extend(node._get_child_candidates(distance, min_dist, max_dist))\n\nreturn result\nThis code contains several smart parts:\nThe loop iterates on a list, but the list expands while the loop is being iterated. It's a concise way to go through all these nested data even if it's a bit dangerous since you can end up with an infinite loop. In this case, candidates.extend(node._get_child_candidates(distance, min_dist, max_dist)) exhausts all the values of the generator, but while keeps creating new generator objects which will produce different values from the previous ones since it's not applied on the same node.\nThe extend() method is a list object method that expects an iterable and adds its values to the list.\nUsually, we pass a list to it:\n>>> a = [1, 2]\n>>> b = [3, 4]\n>>> a.extend(b)\n>>> print(a)\n[1, 2, 3, 4]\nBut in your code, it gets a generator, which is good because:\nYou don't need to read the values twice.\nYou may have a lot of children and you don't want them all stored in memory.\nAnd it works because Python does not care if the argument of a method is a list or not. Python expects iterables so it will work with strings, lists, tuples, and generators! This is called duck typing and is one of the reasons why Python is so cool. But this is another story, for another question...\nYou can stop here, or read a little bit to see an advanced use of a generator:\nControlling a generator exhaustion\n>>> class Bank(): # Let's create a bank, building ATMs\n... crisis = False\n... def create_atm(self):\n... while not self.crisis:\n... yield \"$100\"\n>>> hsbc = Bank() # When everything's ok the ATM gives you as much as you want\n>>> corner_street_atm = hsbc.create_atm()\n>>> print(corner_street_atm.next())\n$100\n>>> print(corner_street_atm.next())\n$100\n>>> print([corner_street_atm.next() for cash in range(5)])\n['$100', '$100', '$100', '$100', '$100']\n>>> hsbc.crisis = True # Crisis is coming, no more money!\n>>> print(corner_street_atm.next())\n\n>>> wall_street_atm = hsbc.create_atm() # It's even true for new ATMs\n>>> print(wall_street_atm.next())\n\n>>> hsbc.crisis = False # The trouble is, even post-crisis the ATM remains empty\n>>> print(corner_street_atm.next())\n\n>>> brand_new_atm = hsbc.create_atm() # Build a new one to get back in business\n>>> for cash in brand_new_atm:\n... print cash\n$100\n$100\n$100\n$100\n$100\n$100\n$100\n$100\n$100\n...\nNote: For Python 3, useprint(corner_street_atm.__next__()) or print(next(corner_street_atm))\nIt can be useful for various things like controlling access to a resource.\nItertools, your best friend\nThe itertools module contains special functions to manipulate iterables. Ever wish to duplicate a generator? Chain two generators? Group values in a nested list with a one-liner? Map / Zip without creating another list?\nThen just import itertools.\nAn example? Let's see the possible orders of arrival for a four-horse race:\n>>> horses = [1, 2, 3, 4]\n>>> races = itertools.permutations(horses)\n>>> print(races)\n\n>>> print(list(itertools.permutations(horses)))\n[(1, 2, 3, 4),\n (1, 2, 4, 3),\n (1, 3, 2, 4),\n (1, 3, 4, 2),\n (1, 4, 2, 3),\n (1, 4, 3, 2),\n (2, 1, 3, 4),\n (2, 1, 4, 3),\n (2, 3, 1, 4),\n (2, 3, 4, 1),\n (2, 4, 1, 3),\n (2, 4, 3, 1),\n (3, 1, 2, 4),\n (3, 1, 4, 2),\n (3, 2, 1, 4),\n (3, 2, 4, 1),\n (3, 4, 1, 2),\n (3, 4, 2, 1),\n (4, 1, 2, 3),\n (4, 1, 3, 2),\n (4, 2, 1, 3),\n (4, 2, 3, 1),\n (4, 3, 1, 2),\n (4, 3, 2, 1)]\nUnderstanding the inner mechanisms of iteration\nIteration is a process implying iterables (implementing the __iter__() method) and iterators (implementing the __next__() method). Iterables are any objects you can get an iterator from. Iterators are objects that let you iterate on iterables.\nThere is more about it in this article about how for loops work.\n"},{"upvotes":2562,"author":"unimplemented","content":"2562\nShortcut to understanding yield\nWhen you see a function with yield statements, apply this easy trick to understand what will happen:\nInsert a line result = [] at the start of the function.\nReplace each yield expr with result.append(expr).\nInsert a line return result at the bottom of the function.\nYay - no more yield statements! Read and figure out the code.\nCompare the function to the original definition.\nThis trick may give you an idea of the logic behind the function, but what actually happens with yield is significantly different than what happens in the list-based approach. In many cases, the yield approach will be a lot more memory efficient and faster too. In other cases, this trick will get you stuck in an infinite loop, even though the original function works just fine. Read on to learn more...\nDon't confuse your iterables, iterators, and generators\nFirst, the iterator protocol - when you write\nfor x in mylist:\n ...loop body...\nPython performs the following two steps:\nGets an iterator for mylist:\nCall iter(mylist) -> this returns an object with a next() method (or __next__() in Python 3).\n[This is the step most people forget to tell you about]\nUses the iterator to loop over items:\nKeep calling the next() method on the iterator returned from step 1. The return value from next() is assigned to x and the loop body is executed. If an exception StopIteration is raised from within next(), it means there are no more values in the iterator and the loop is exited.\nThe truth is Python performs the above two steps anytime it wants to loop over the contents of an object - so it could be a for loop, but it could also be code like otherlist.extend(mylist) (where otherlist is a Python list).\nHere mylist is an iterable because it implements the iterator protocol. In a user-defined class, you can implement the __iter__() method to make instances of your class iterable. This method should return an iterator. An iterator is an object with a next() method. It is possible to implement both __iter__() and next() on the same class, and have __iter__() return self. This will work for simple cases, but not when you want two iterators looping over the same object at the same time.\nSo that's the iterator protocol, many objects implement this protocol:\nBuilt-in lists, dictionaries, tuples, sets, and files.\nUser-defined classes that implement __iter__().\nGenerators.\nNote that a for loop doesn't know what kind of object it's dealing with - it just follows the iterator protocol, and is happy to get item after item as it calls next(). Built-in lists return their items one by one, dictionaries return the keys one by one, files return the lines one by one, etc. And generators return... well that's where yield comes in:\ndef f123():\n yield 1\n yield 2\n yield 3\n\nfor item in f123():\n print item\nInstead of yield statements, if you had three return statements in f123() only the first would get executed, and the function would exit. But f123() is no ordinary function. When f123() is called, it does not return any of the values in the yield statements! It returns a generator object. Also, the function does not really exit - it goes into a suspended state. When the for loop tries to loop over the generator object, the function resumes from its suspended state at the very next line after the yield it previously returned from, executes the next line of code, in this case, a yield statement, and returns that as the next item. This happens until the function exits, at which point the generator raises StopIteration, and the loop exits.\nSo the generator object is sort of like an adapter - at one end it exhibits the iterator protocol, by exposing __iter__() and next() methods to keep the for loop happy. At the other end, however, it runs the function just enough to get the next value out of it and puts it back in suspended mode.\nWhy use generators?\nUsually, you can write code that doesn't use generators but implements the same logic. One option is to use the temporary list 'trick' I mentioned before. That will not work in all cases, for e.g. if you have infinite loops, or it may make inefficient use of memory when you have a really long list. The other approach is to implement a new iterable class SomethingIter that keeps the state in instance members and performs the next logical step in its next() (or __next__() in Python 3) method. Depending on the logic, the code inside the next() method may end up looking very complex and prone to bugs. Here generators provide a clean and easy solution.\n"},{"upvotes":781,"author":"unimplemented","content":"781\nThink of it this way:\nAn iterator is just a fancy sounding term for an object that has a next() method. So a yield-ed function ends up being something like this:\nOriginal version:\ndef some_function():\n for i in xrange(4):\n yield i\n\nfor i in some_function():\n print i\nThis is basically what the Python interpreter does with the above code:\nclass it:\n def __init__(self):\n # Start at -1 so that we get 0 when we add 1 below.\n self.count = -1\n\n # The __iter__ method will be called once by the 'for' loop.\n # The rest of the magic happens on the object returned by this method.\n # In this case it is the object itself.\n def __iter__(self):\n return self\n\n # The next method will be called repeatedly by the 'for' loop\n # until it raises StopIteration.\n def next(self):\n self.count += 1\n if self.count < 4:\n return self.count\n else:\n # A StopIteration exception is raised\n # to signal that the iterator is done.\n # This is caught implicitly by the 'for' loop.\n raise StopIteration\n\ndef some_func():\n return it()\n\nfor i in some_func():\n print i\nFor more insight as to what's happening behind the scenes, the for loop can be rewritten to this:\niterator = some_func()\ntry:\n while 1:\n print iterator.next()\nexcept StopIteration:\n pass\nDoes that make more sense or just confuse you more? :)\nI should note that this is an oversimplification for illustrative purposes. :)\n"},{"upvotes":668,"author":"unimplemented","content":"668\nThe yield keyword is reduced to two simple facts:\nIf the compiler detects the yield keyword anywhere inside a function, that function no longer returns via the return statement. Instead, it immediately returns a lazy \"pending list\" object called a generator\nA generator is iterable. What is an iterable? It's anything like a list, set, range, dictionary view, or any other object with a built-in protocol for visiting each element in a certain order.\nIn a nutshell: Most commonly, a generator is a lazy, incrementally-pending list, and yield statements allow you to use function notation to program the list values the generator should incrementally spit out. Furthermore, advanced usage lets you use generators as coroutines (see below).\ngenerator = myYieldingFunction(...) # basically a list (but lazy)\nx = list(generator) # evaluate every element into a list\n\n generator\n v\n[x[0], ..., ???]\n\n generator\n v\n[x[0], x[1], ..., ???]\n\n generator\n v\n[x[0], x[1], x[2], ..., ???]\n\n StopIteration exception\n[x[0], x[1], x[2]] done\nBasically, whenever the yield statement is encountered, the function pauses and saves its state, then emits \"the next return value in the 'list'\" according to the python iterator protocol (to some syntactic construct like a for-loop that repeatedly calls next() and catches a StopIteration exception, etc.). You might have encountered generators with generator expressions; generator functions are more powerful because you can pass arguments back into the paused generator function, using them to implement coroutines. More on that later.\nBasic Example ('list')\nLet's define a function makeRange that's just like Python's range. Calling makeRange(n) RETURNS A GENERATOR:\ndef makeRange(n):\n # return 0,1,2,...,n-1\n i = 0\n while i < n:\n yield i\n i += 1\n\n>>> makeRange(5)\n\nTo force the generator to immediately return its pending values, you can pass it into list() (just like you could any iterable):\n>>> list(makeRange(5))\n[0, 1, 2, 3, 4]\nComparing the example to \"just returning a list\"\nThe above example can be thought of as merely creating a list that you append to and return:\n# return a list # # return a generator\ndef makeRange(n): # def makeRange(n):\n \"\"\"return [0,1,2,...,n-1]\"\"\" # \"\"\"return 0,1,2,...,n-1\"\"\"\n TO_RETURN = [] # \n i = 0 # i = 0\n while i < n: # while i < n:\n TO_RETURN += [i] # yield i\n i += 1 # i += 1\n return TO_RETURN # \n\n>>> makeRange(5)\n[0, 1, 2, 3, 4]\nThere is one major difference, though; see the last section.\nHow you might use generators\nAn iterable is the last part of a list comprehension, and all generators are iterable, so they're often used like so:\n# < ITERABLE >\n>>> [x+10 for x in makeRange(5)]\n[10, 11, 12, 13, 14]\nTo get a better feel for generators, you can play around with the itertools module (be sure to use chain.from_iterable rather than chain when warranted). For example, you might even use generators to implement infinitely-long lazy lists like itertools.count(). You could implement your own def enumerate(iterable): zip(count(), iterable), or alternatively do so with the yield keyword in a while-loop.\nPlease note: generators can actually be used for many more things, such as implementing coroutines, non-deterministic programming, and other elegant things. However, the \"lazy lists\" viewpoint I present here is the most common use you will find.\nBehind the scenes\nThis is how the \"Python iteration protocol\" works. That is, what is going on when you do list(makeRange(5)). This is what I describe earlier as a \"lazy, incremental list\".\n>>> x=iter(range(5))\n>>> next(x) # calls x.__next__(); x.next() is deprecated\n0\n>>> next(x)\n1\n>>> next(x)\n2\n>>> next(x)\n3\n>>> next(x)\n4\n>>> next(x)\nTraceback (most recent call last):\n File \"\", line 1, in \nStopIteration\nThe built-in function next() just calls the objects .__next__() function, which is a part of the \"iteration protocol\" and is found on all iterators. You can manually use the next() function (and other parts of the iteration protocol) to implement fancy things, usually at the expense of readability, so try to avoid doing that...\nCoroutines\nCoroutine example:\ndef interactiveProcedure():\n userResponse = yield makeQuestionWebpage()\n print('user response:', userResponse)\n yield 'success'\n\ncoroutine = interactiveProcedure()\nwebFormData = next(coroutine) # same as .send(None)\nuserResponse = serveWebForm(webFormData)\n\n# ...at some point later on web form submit...\n\nsuccessStatus = coroutine.send(userResponse)\nA coroutine (generators that generally accept input via the yield keyword e.g. nextInput = yield nextOutput, as a form of two-way communication) is basically a computation that is allowed to pause itself and request input (e.g. to what it should do next). When the coroutine pauses itself (when the running coroutine eventually hits a yield keyword), the computation is paused and control is inverted (yielded) back to the 'calling' function (the frame which requested the next value of the computation). The paused generator/coroutine remains paused until another invoking function (possibly a different function/context) requests the next value to unpause it (usually passing input data to direct the paused logic interior to the coroutine's code).\nYou can think of Python coroutines as lazy incrementally-pending lists, where the next element doesn't just depend on the previous computation but also on input that you may opt to inject during the generation process.\nMinutiae\nNormally, most people would not care about the following distinctions and probably want to stop reading here.\nIn Python-speak, an iterable is any object which \"understands the concept of a for-loop\" like a list [1,2,3], and an iterator is a specific instance of the requested for-loop like [1,2,3].__iter__(). A generator is exactly the same as any iterator, except for the way it was written (with function syntax).\nWhen you request an iterator from a list, it creates a new iterator. However, when you request an iterator from an iterator (which you would rarely do), it just gives you a copy of itself.\nThus, in the unlikely event that you are failing to do something like this...\n> x = myRange(5)\n> list(x)\n[0, 1, 2, 3, 4]\n> list(x)\n[]\n... then remember that a generator is an iterator; that is, it is one-time-use. If you want to reuse it, you should call myRange(...) again. If you need to use the result twice, convert the result to a list and store it in a variable x = list(myRange(5)). Those who absolutely need to clone a generator (for example, who are doing terrifyingly hackish metaprogramming) can use itertools.tee (still works in Python 3) if absolutely necessary, since the copyable iterator Python PEP standards proposal has been deferred.\n"},{"upvotes":578,"author":"unimplemented","content":"578\nWhat does the yield keyword do in Python?\nAnswer Outline/Summary\nA function with yield, when called, returns a Generator.\nGenerators are iterators because they implement the iterator protocol, so you can iterate over them.\nA generator can also be sent information, making it conceptually a coroutine.\nIn Python 3, you can delegate from one generator to another in both directions with yield from.\n(Appendix critiques a couple of answers, including the top one, and discusses the use of return in a generator.)\nGenerators:\nyield is only legal inside of a function definition, and the inclusion of yield in a function definition makes it return a generator.\nThe idea for generators comes from other languages (see footnote 1) with varying implementations. In Python's Generators, the execution of the code is frozen at the point of the yield. When the generator is called (methods are discussed below) execution resumes and then freezes at the next yield.\nyield provides an easy way of implementing the iterator protocol, defined by the following two methods: __iter__ and __next__. Both of those methods make an object an iterator that you could type-check with the Iterator Abstract Base Class from the collections module.\ndef func():\n yield 'I am'\n yield 'a generator!'\nLet's do some introspection:\n>>> type(func) # A function with yield is still a function\n\n>>> gen = func()\n>>> type(gen) # but it returns a generator\n\n>>> hasattr(gen, '__iter__') # that's an iterable\nTrue\n>>> hasattr(gen, '__next__') # and with .__next__\nTrue # implements the iterator protocol.\nThe generator type is a sub-type of iterator:\nfrom types import GeneratorType\nfrom collections.abc import Iterator\n\n>>> issubclass(GeneratorType, Iterator)\nTrue\nAnd if necessary, we can type-check like this:\n>>> isinstance(gen, GeneratorType)\nTrue\n>>> isinstance(gen, Iterator)\nTrue\nA feature of an Iterator is that once exhausted, you can't reuse or reset it:\n>>> list(gen)\n['I am', 'a generator!']\n>>> list(gen)\n[]\nYou'll have to make another if you want to use its functionality again (see footnote 2):\n>>> list(func())\n['I am', 'a generator!']\nOne can yield data programmatically, for example:\ndef func(an_iterable):\n for item in an_iterable:\n yield item\nThe above simple generator is also equivalent to the below - as of Python 3.3 you can use yield from:\ndef func(an_iterable):\n yield from an_iterable\nHowever, yield from also allows for delegation to subgenerators, which will be explained in the following section on cooperative delegation with sub-coroutines.\nCoroutines:\nyield forms an expression that allows data to be sent into the generator (see footnote 3)\nHere is an example, take note of the received variable, which will point to the data that is sent to the generator:\ndef bank_account(deposited, interest_rate):\n while True:\n calculated_interest = interest_rate * deposited \n received = yield calculated_interest\n if received:\n deposited += received\n\n\n>>> my_account = bank_account(1000, .05)\nFirst, we must queue up the generator with the built-in function, next. It will call the appropriate next or __next__ method, depending on the version of Python you are using:\n>>> first_year_interest = next(my_account)\n>>> first_year_interest\n50.0\nAnd now we can send data into the generator. (Sending None is the same as calling next.) :\n>>> next_year_interest = my_account.send(first_year_interest + 1000)\n>>> next_year_interest\n102.5\nCooperative Delegation to Sub-Coroutine with yield from\nNow, recall that yield from is available in Python 3. This allows us to delegate coroutines to a subcoroutine:\ndef money_manager(expected_rate):\n # must receive deposited value from .send():\n under_management = yield # yield None to start.\n while True:\n try:\n additional_investment = yield expected_rate * under_management \n if additional_investment:\n under_management += additional_investment\n except GeneratorExit:\n '''TODO: write function to send unclaimed funds to state'''\n raise\n finally:\n '''TODO: write function to mail tax info to client'''\n \n\ndef investment_account(deposited, manager):\n '''very simple model of an investment account that delegates to a manager'''\n # must queue up manager:\n next(manager) # <- same as manager.send(None)\n # This is where we send the initial deposit to the manager:\n manager.send(deposited)\n try:\n yield from manager\n except GeneratorExit:\n return manager.close() # delegate?\nAnd now we can delegate functionality to a sub-generator and it can be used by a generator just as above:\nmy_manager = money_manager(.06)\nmy_account = investment_account(1000, my_manager)\nfirst_year_return = next(my_account) # -> 60.0\nNow simulate adding another 1,000 to the account plus the return on the account (60.0):\nnext_year_return = my_account.send(first_year_return + 1000)\nnext_year_return # 123.6\nYou can read more about the precise semantics of yield from in PEP 380.\nOther Methods: close and throw\nThe close method raises GeneratorExit at the point the function execution was frozen. This will also be called by __del__ so you can put any cleanup code where you handle the GeneratorExit:\nmy_account.close()\nYou can also throw an exception which can be handled in the generator or propagated back to the user:\nimport sys\ntry:\n raise ValueError\nexcept:\n my_manager.throw(*sys.exc_info())\nRaises:\nTraceback (most recent call last):\n File \"\", line 4, in \n File \"\", line 6, in money_manager\n File \"\", line 2, in \nValueError\nConclusion\nI believe I have covered all aspects of the following question:\nWhat does the yield keyword do in Python?\nIt turns out that yield does a lot. I'm sure I could add even more thorough examples to this. If you want more or have some constructive criticism, let me know by commenting below.\nAppendix:\nCritique of the Top/Accepted Answer**\nIt is confused about what makes an iterable, just using a list as an example. See my references above, but in summary: an iterable has an __iter__ method returning an iterator. An iterator additionally provides a .__next__ method, which is implicitly called by for loops until it raises StopIteration, and once it does raise StopIteration, it will continue to do so.\nIt then uses a generator expression to describe what a generator is. Since a generator expression is simply a convenient way to create an iterator, it only confuses the matter, and we still have not yet gotten to the yield part.\nIn Controlling a generator exhaustion he calls the .next method (which only works in Python 2), when instead he should use the built-in function, next. Calling next(obj) would be an appropriate layer of indirection, because his code does not work in Python 3.\nItertools? This was not relevant to what yield does at all.\nNo discussion of the methods that yield provides along with the new functionality yield from in Python 3.\nThe top/accepted answer is a very incomplete answer.\nCritique of answer suggesting yield in a generator expression or comprehension.\nThe grammar currently allows any expression in a list comprehension.\nexpr_stmt: testlist_star_expr (annassign | augassign (yield_expr|testlist) |\n ('=' (yield_expr|testlist_star_expr))*)\n...\nyield_expr: 'yield' [yield_arg]\nyield_arg: 'from' test | testlist\nSince yield is an expression, it has been touted by some as interesting to use it in comprehensions or generator expression - in spite of citing no particularly good use-case.\nThe CPython core developers are discussing deprecating its allowance. Here's a relevant post from the mailing list:\nOn 30 January 2017 at 19:05, Brett Cannon wrote:\nOn Sun, 29 Jan 2017 at 16:39 Craig Rodrigues wrote:\nI'm OK with either approach. Leaving things the way they are in Python 3 is no good, IMHO.\nMy vote is it be a SyntaxError since you're not getting what you expect from the syntax.\nI'd agree that's a sensible place for us to end up, as any code relying on the current behaviour is really too clever to be maintainable.\nIn terms of getting there, we'll likely want:\nSyntaxWarning or DeprecationWarning in 3.7\nPy3k warning in 2.7.x\nSyntaxError in 3.8\nCheers, Nick.\n-- Nick Coghlan | ncoghlan at gmail.com | Brisbane, Australia\nFurther, there is an outstanding issue (10544) which seems to be pointing in the direction of this never being a good idea (PyPy, a Python implementation written in Python, is already raising syntax warnings.)\nBottom line, until the developers of CPython tell us otherwise: Don't put yield in a generator expression or comprehension.\nThe return statement in a generator\nIn Python 3:\nIn a generator function, the return statement indicates that the generator is done and will cause StopIteration to be raised. The returned value (if any) is used as an argument to construct StopIteration and becomes the StopIteration.value attribute.\nHistorical note, in Python 2: \"In a generator function, the return statement is not allowed to include an expression_list. In that context, a bare return indicates that the generator is done and will cause StopIteration to be raised.\" An expression_list is basically any number of expressions separated by commas - essentially, in Python 2, you can stop the generator with return, but you can't return a value.\nFootnotes\nThe languages CLU, Sather, and Icon were referenced in the proposal to introduce the concept of generators to Python. The general idea is that a function can maintain an internal state and yield intermediate data points on demand by the user. This promised to be superior in performance to other approaches, including Python threading, which isn't even available on some systems.\nThis means, for example, that range objects aren't Iterators, even though they are iterable, because they can be reused. Like lists, their __iter__ methods return iterator objects.\nyield was originally introduced as a statement, meaning that it could only appear at the beginning of a line in a code block. Now yield creates a yield expression. https://docs.python.org/2/reference/simple_stmts.html#grammar-token-yield_stmt This change was proposed to allow a user to send data into the generator just as one might receive it. To send data, one must be able to assign it to something, and for that, a statement just won't work.\n"},{"upvotes":465,"author":"unimplemented","content":"465\nyield is just like return - it returns whatever you tell it to (as a generator). The difference is that the next time you call the generator, execution starts from the last call to the yield statement. Unlike return, the stack frame is not cleaned up when a yield occurs, however control is transferred back to the caller, so its state will resume the next time the function is called.\nIn the case of your code, the function get_child_candidates is acting like an iterator so that when you extend your list, it adds one element at a time to the new list.\nlist.extend calls an iterator until it's exhausted. In the case of the code sample you posted, it would be much clearer to just return a tuple and append that to the list.\n"},{"upvotes":324,"author":"unimplemented","content":"324\nThere's one extra thing to mention: a function that yields doesn't actually have to terminate. I've written code like this:\ndef fib():\n last, cur = 0, 1\n while True: \n yield cur\n last, cur = cur, last + cur\nThen I can use it in other code like this:\nfor f in fib():\n if some_condition: break\n coolfuncs(f);\nIt really helps simplify some problems, and makes some things easier to work with.\n"},{"upvotes":314,"author":"unimplemented","content":"314\nFor those who prefer a minimal working example, meditate on this interactive Python session:\n>>> def f():\n... yield 1\n... yield 2\n... yield 3\n... \n>>> g = f()\n>>> for i in g:\n... print(i)\n... \n1\n2\n3\n>>> for i in g:\n... print(i)\n... \n>>> # Note that this time nothing was printed\n"},{"upvotes":295,"author":"unimplemented","content":"295\nTL;DR\nInstead of this:\ndef square_list(n):\n the_list = [] # Replace\n for x in range(n):\n y = x * x\n the_list.append(y) # these\n return the_list # lines\ndo this:\ndef square_yield(n):\n for x in range(n):\n y = x * x\n yield y # with this one.\nWhenever you find yourself building a list from scratch, yield each piece instead.\nThis was my first \"aha\" moment with yield.\nyield is a sugary way to say\nbuild a series of stuff\nSame behavior:\n>>> for square in square_list(4):\n... print(square)\n...\n0\n1\n4\n9\n>>> for square in square_yield(4):\n... print(square)\n...\n0\n1\n4\n9\nDifferent behavior:\nYield is single-pass: you can only iterate through once. When a function has a yield in it we call it a generator function. And an iterator is what it returns. Those terms are revealing. We lose the convenience of a container, but gain the power of a series that's computed as needed, and arbitrarily long.\nYield is lazy, it puts off computation. A function with a yield in it doesn't actually execute at all when you call it. It returns an iterator object that remembers where it left off. Each time you call next() on the iterator (this happens in a for-loop) execution inches forward to the next yield. return raises StopIteration and ends the series (this is the natural end of a for-loop).\nYield is versatile. Data doesn't have to be stored all together, it can be made available one at a time. It can be infinite.\n>>> def squares_all_of_them():\n... x = 0\n... while True:\n... yield x * x\n... x += 1\n...\n>>> squares = squares_all_of_them()\n>>> for _ in range(4):\n... print(next(squares))\n...\n0\n1\n4\n9\nIf you need multiple passes and the series isn't too long, just call list() on it:\n>>> list(square_yield(4))\n[0, 1, 4, 9]\nBrilliant choice of the word yield because both meanings apply:\nyield — produce or provide (as in agriculture)\n...provide the next data in the series.\nyield — give way or relinquish (as in political power)\n...relinquish CPU execution until the iterator advances.\n"},{"upvotes":250,"author":"unimplemented","content":"250\nIt's returning a generator. I'm not particularly familiar with Python, but I believe it's the same kind of thing as C#'s iterator blocks if you're familiar with those.\nThe key idea is that the compiler/interpreter/whatever does some trickery so that as far as the caller is concerned, they can keep calling next() and it will keep returning values - as if the generator method was paused. Now obviously you can't really \"pause\" a method, so the compiler builds a state machine for you to remember where you currently are and what the local variables etc look like. This is much easier than writing an iterator yourself.\n"},{"upvotes":250,"author":"unimplemented","content":"250\nYield gives you a generator.\ndef get_odd_numbers(i):\n return range(1, i, 2)\ndef yield_odd_numbers(i):\n for x in range(1, i, 2):\n yield x\nfoo = get_odd_numbers(10)\nbar = yield_odd_numbers(10)\nfoo\n[1, 3, 5, 7, 9]\nbar\n\nbar.next()\n1\nbar.next()\n3\nbar.next()\n5\nAs you can see, in the first case foo holds the entire list in memory at once. It's not a big deal for a list with 5 elements, but what if you want a list of 5 million? Not only is this a huge memory eater, it also costs a lot of time to build at the time that the function is called.\nIn the second case, bar just gives you a generator. A generator is an iterable--which means you can use it in a for loop, etc, but each value can only be accessed once. All the values are also not stored in memory at the same time; the generator object \"remembers\" where it was in the looping the last time you called it--this way, if you're using an iterable to (say) count to 50 billion, you don't have to count to 50 billion all at once and store the 50 billion numbers to count through.\nAgain, this is a pretty contrived example, you probably would use itertools if you really wanted to count to 50 billion. :)\nThis is the most simple use case of generators. As you said, it can be used to write efficient permutations, using yield to push things up through the call stack instead of using some sort of stack variable. Generators can also be used for specialized tree traversal, and all manner of other things.\n"},{"upvotes":224,"author":"unimplemented","content":"224\nHere is an example in plain language. I will provide a correspondence between high-level human concepts to low-level Python concepts.\nI want to operate on a sequence of numbers, but I don't want to bother myself with the creation of that sequence, I want only to focus on the operation I want to do. So, I do the following:\nI call you and tell you that I want a sequence of numbers that are calculated in a specific way, and I let you know what the algorithm is.\nThis step corresponds to defining the generator function, i.e. the function containing a yield.\nSometime later, I tell you, \"OK, get ready to tell me the sequence of numbers\".\nThis step corresponds to calling the generator function which returns a generator object. Note that you don't tell me any numbers yet; you just grab your paper and pencil.\nI ask you, \"Tell me the next number\", and you tell me the first number; after that, you wait for me to ask you for the next number. It's your job to remember where you were, what numbers you have already said, and what is the next number. I don't care about the details.\nThis step corresponds to calling next(generator) on the generator object.\n(In Python 2, .next was a method of the generator object; in Python 3, it is named .__next__, but the proper way to call it is using the builtin next() function just like len() and .__len__)\n… repeat the previous step, until…\neventually, you might come to an end. You don't tell me a number; you just shout, \"Hold your horses! I'm done! No more numbers!\"\nThis step corresponds to the generator object ending its job, and raising a StopIteration exception.\nThe generator function does not need to raise the exception. It's raised automatically when the function ends or issues a return.\nThis is what a generator does (a function that contains a yield); it starts executing on the first next(), pauses whenever it does a yield, and when asked for the next() value it continues from the point it was last. It fits perfectly by design with the iterator protocol of Python, which describes how to sequentially request values.\nThe most famous user of the iterator protocol is the for command in Python. So, whenever you do a:\nfor item in sequence:\nit doesn't matter if sequence is a list, a string, a dictionary or a generator object like described above; the result is the same: you read items off a sequence one by one.\nNote that defining a function that contains a yield keyword is not the only way to create a generator; it's just the easiest way to create one.\nFor more accurate information, read about iterator types, the yield statement, and generators in the Python documentation.\n"},{"upvotes":221,"author":"unimplemented","content":"221\nThere is one type of answer that I don't feel has been given yet, among the many great answers that describe how to use generators. Here is the programming language theory answer:\nThe yield statement in Python returns a generator. A generator in Python is a function that returns continuations (and specifically a type of coroutine, but continuations represent the more general mechanism to understand what is going on).\nContinuations in programming languages theory are a much more fundamental kind of computation, but they are not often used, because they are extremely hard to reason about and also very difficult to implement. But the idea of what a continuation is, is straightforward: it is the state of a computation that has not yet finished. In this state, the current values of variables, the operations that have yet to be performed, and so on, are saved. Then at some point later in the program the continuation can be invoked, such that the program's variables are reset to that state and the operations that were saved are carried out.\nContinuations, in this more general form, can be implemented in two ways. In the call/cc way, the program's stack is literally saved and then when the continuation is invoked, the stack is restored.\nIn continuation passing style (CPS), continuations are just normal functions (only in languages where functions are first class) which the programmer explicitly manages and passes around to subroutines. In this style, program state is represented by closures (and the variables that happen to be encoded in them) rather than variables that reside somewhere on the stack. Functions that manage control flow accept continuation as arguments (in some variations of CPS, functions may accept multiple continuations) and manipulate control flow by invoking them by simply calling them and returning afterwards. A very simple example of continuation passing style is as follows:\ndef save_file(filename):\n def write_file_continuation():\n write_stuff_to_file(filename)\n\n check_if_file_exists_and_user_wants_to_overwrite(write_file_continuation)\nIn this (very simplistic) example, the programmer saves the operation of actually writing the file into a continuation (which can potentially be a very complex operation with many details to write out), and then passes that continuation (i.e, as a first-class closure) to another operator which does some more processing, and then calls it if necessary. (I use this design pattern a lot in actual GUI programming, either because it saves me lines of code or, more importantly, to manage control flow after GUI events trigger.)\nThe rest of this post will, without loss of generality, conceptualize continuations as CPS, because it is a hell of a lot easier to understand and read.\n\nNow let's talk about generators in Python. Generators are a specific subtype of continuation. Whereas continuations are able in general to save the state of a computation (i.e., the program's call stack), generators are only able to save the state of iteration over an iterator. Although, this definition is slightly misleading for certain use cases of generators. For instance:\ndef f():\n while True:\n yield 4\nThis is clearly a reasonable iterable whose behavior is well defined -- each time the generator iterates over it, it returns 4 (and does so forever). But it isn't probably the prototypical type of iterable that comes to mind when thinking of iterators (i.e., for x in collection: do_something(x)). This example illustrates the power of generators: if anything is an iterator, a generator can save the state of its iteration.\nTo reiterate: Continuations can save the state of a program's stack and generators can save the state of iteration. This means that continuations are more a lot powerful than generators, but also that generators are a lot, lot easier. They are easier for the language designer to implement, and they are easier for the programmer to use (if you have some time to burn, try to read and understand this page about continuations and call/cc).\nBut you could easily implement (and conceptualize) generators as a simple, specific case of continuation passing style:\nWhenever yield is called, it tells the function to return a continuation. When the function is called again, it starts from wherever it left off. So, in pseudo-pseudocode (i.e., not pseudocode, but not code) the generator's next method is basically as follows:\nclass Generator():\n def __init__(self,iterable,generatorfun):\n self.next_continuation = lambda:generatorfun(iterable)\n\n def next(self):\n value, next_continuation = self.next_continuation()\n self.next_continuation = next_continuation\n return value\nwhere the yield keyword is actually syntactic sugar for the real generator function, basically something like:\ndef generatorfun(iterable):\n if len(iterable) == 0:\n raise StopIteration\n else:\n return (iterable[0], lambda:generatorfun(iterable[1:]))\nRemember that this is just pseudocode and the actual implementation of generators in Python is more complex. But as an exercise to understand what is going on, try to use continuation passing style to implement generator objects without use of the yield keyword.\n"},{"upvotes":173,"author":"unimplemented","content":"173\nWhile a lot of answers show why you'd use a yield to create a generator, there are more uses for yield. It's quite easy to make a coroutine, which enables the passing of information between two blocks of code. I won't repeat any of the fine examples that have already been given about using yield to create a generator.\nTo help understand what a yield does in the following code, you can use your finger to trace the cycle through any code that has a yield. Every time your finger hits the yield, you have to wait for a next or a send to be entered. When a next is called, you trace through the code until you hit the yield… the code on the right of the yield is evaluated and returned to the caller… then you wait. When next is called again, you perform another loop through the code. However, you'll note that in a coroutine, yield can also be used with a send… which will send a value from the caller into the yielding function. If a send is given, then yield receives the value sent, and spits it out the left hand side… then the trace through the code progresses until you hit the yield again (returning the value at the end, as if next was called).\nFor example:\n>>> def coroutine():\n... i = -1\n... while True:\n... i += 1\n... val = (yield i)\n... print(\"Received %s\" % val)\n...\n>>> sequence = coroutine()\n>>> sequence.next()\n0\n>>> sequence.next()\nReceived None\n1\n>>> sequence.send('hello')\nReceived hello\n2\n>>> sequence.close()\n"},{"upvotes":169,"author":"unimplemented","content":"169\nThere is another yield use and meaning (since Python 3.3):\nyield from \nFrom PEP 380 -- Syntax for Delegating to a Subgenerator:\nA syntax is proposed for a generator to delegate part of its operations to another generator. This allows a section of code containing 'yield' to be factored out and placed in another generator. Additionally, the subgenerator is allowed to return with a value, and the value is made available to the delegating generator.\nThe new syntax also opens up some opportunities for optimisation when one generator re-yields values produced by another.\nMoreover this will introduce (since Python 3.5):\nasync def new_coroutine(data):\n ...\n await blocking_action()\nto avoid coroutines being confused with a regular generator (today yield is used in both).\n"},{"upvotes":157,"author":"unimplemented","content":"157\nAll great answers, however a bit difficult for newbies.\nI assume you have learned the return statement.\nAs an analogy, return and yield are twins. return means 'return and stop' whereas 'yield` means 'return, but continue'\nTry to get a num_list with return.\ndef num_list(n):\n for i in range(n):\n return i\nRun it:\nIn [5]: num_list(3)\nOut[5]: 0\nSee, you get only a single number rather than a list of them. return never allows you prevail happily, just implements once and quit.\nThere comes yield\nReplace return with yield:\nIn [10]: def num_list(n):\n ...: for i in range(n):\n ...: yield i\n ...:\n\nIn [11]: num_list(3)\nOut[11]: \n\nIn [12]: list(num_list(3))\nOut[12]: [0, 1, 2]\nNow, you win to get all the numbers.\nComparing to return which runs once and stops, yield runs times you planed. You can interpret return as return one of them, and yield as return all of them. This is called iterable.\nOne more step we can rewrite yield statement with return\nIn [15]: def num_list(n):\n ...: result = []\n ...: for i in range(n):\n ...: result.append(i)\n ...: return result\n\nIn [16]: num_list(3)\nOut[16]: [0, 1, 2]\nIt's the core about yield.\nThe difference between a list return outputs and the object yield output is:\nYou will always get [0, 1, 2] from a list object but only could retrieve them from 'the object yield output' once. So, it has a new name generator object as displayed in Out[11]: .\nIn conclusion, as a metaphor to grok it:\nreturn and yield are twins\nlist and generator are twins\n"},{"upvotes":145,"author":"unimplemented","content":"145\nFrom a programming viewpoint, the iterators are implemented as thunks.\nTo implement concepts such as iterators, generators, concurrent execution via messages, etc., one uses messages sent to a closure object, which has a dispatcher, and the dispatcher answers to \"messages\" (this concept comes from Simula and is the central part of Smalltalk).\n\"next\" is a message sent to a closure, created by the \"iter\" call.\nThere are lots of ways to implement this computation. I used mutation, but it is possible to do this kind of computation without mutation, by returning the current value and the next yielder (making it referential transparent). Racket uses a sequence of transformations of the initial program in some intermediary languages, one of such rewriting making the yield operator to be transformed in some language with simpler operators.\nHere is a demonstration of how yield could be rewritten, which uses the structure of R6RS, but the semantics is identical to Python's. It's the same model of computation, and only a change in syntax is required to rewrite it using yield of Python.\nWelcome to Racket v6.5.0.3.\n\n-> (define gen\n (lambda (l)\n (define yield\n (lambda ()\n (if (null? l)\n 'END\n (let ((v (car l)))\n (set! l (cdr l))\n v))))\n (lambda(m)\n (case m\n ('yield (yield))\n ('init (lambda (data)\n (set! l data)\n 'OK))))))\n-> (define stream (gen '(1 2 3)))\n-> (stream 'yield)\n1\n-> (stream 'yield)\n2\n-> (stream 'yield)\n3\n-> (stream 'yield)\n'END\n-> ((stream 'init) '(a b))\n'OK\n-> (stream 'yield)\n'a\n-> (stream 'yield)\n'b\n-> (stream 'yield)\n'END\n-> (stream 'yield)\n'END\n->\n"},{"upvotes":136,"author":"unimplemented","content":"136\nHere are some Python examples of how to actually implement generators as if Python did not provide syntactic sugar for them:\nAs a Python generator:\nfrom itertools import islice\n\ndef fib_gen():\n a, b = 1, 1\n while True:\n yield a\n a, b = b, a + b\n\nassert [1, 1, 2, 3, 5] == list(islice(fib_gen(), 5))\nUsing lexical closures instead of generators\ndef ftake(fnext, last):\n return [fnext() for _ in xrange(last)]\n\ndef fib_gen2():\n #funky scope due to python2.x workaround\n #for python 3.x use nonlocal\n def _():\n _.a, _.b = _.b, _.a + _.b\n return _.a\n _.a, _.b = 0, 1\n return _\n\nassert [1,1,2,3,5] == ftake(fib_gen2(), 5)\nUsing object closures instead of generators (because ClosuresAndObjectsAreEquivalent)\nclass fib_gen3:\n def __init__(self):\n self.a, self.b = 1, 1\n\n def __call__(self):\n r = self.a\n self.a, self.b = self.b, self.a + self.b\n return r\n\nassert [1,1,2,3,5] == ftake(fib_gen3(), 5)\n"},{"upvotes":125,"author":"unimplemented","content":"125\nI was going to post \"read page 19 of Beazley's 'Python: Essential Reference' for a quick description of generators\", but so many others have posted good descriptions already.\nAlso, note that yield can be used in coroutines as the dual of their use in generator functions. Although it isn't the same use as your code snippet, (yield) can be used as an expression in a function. When a caller sends a value to the method using the send() method, then the coroutine will execute until the next (yield) statement is encountered.\nGenerators and coroutines are a cool way to set up data-flow type applications. I thought it would be worthwhile knowing about the other use of the yield statement in functions.\n"},{"upvotes":107,"author":"unimplemented","content":"107\nHere is a simple example:\ndef isPrimeNumber(n):\n print \"isPrimeNumber({}) call\".format(n)\n if n==1:\n return False\n for x in range(2,n):\n if n % x == 0:\n return False\n return True\n\ndef primes (n=1):\n while(True):\n print \"loop step ---------------- {}\".format(n)\n if isPrimeNumber(n): yield n\n n += 1\n\nfor n in primes():\n if n> 10:break\n print \"writing result {}\".format(n)\nOutput:\nloop step ---------------- 1\nisPrimeNumber(1) call\nloop step ---------------- 2\nisPrimeNumber(2) call\nloop step ---------------- 3\nisPrimeNumber(3) call\nwriting result 3\nloop step ---------------- 4\nisPrimeNumber(4) call\nloop step ---------------- 5\nisPrimeNumber(5) call\nwriting result 5\nloop step ---------------- 6\nisPrimeNumber(6) call\nloop step ---------------- 7\nisPrimeNumber(7) call\nwriting result 7\nloop step ---------------- 8\nisPrimeNumber(8) call\nloop step ---------------- 9\nisPrimeNumber(9) call\nloop step ---------------- 10\nisPrimeNumber(10) call\nloop step ---------------- 11\nisPrimeNumber(11) call\nI am not a Python developer, but it looks to me yield holds the position of program flow and the next loop start from \"yield\" position. It seems like it is waiting at that position, and just before that, returning a value outside, and next time continues to work.\nIt seems to be an interesting and nice ability :D\n"},{"upvotes":92,"author":"unimplemented","content":"92\nHere is a mental image of what yield does.\nI like to think of a thread as having a stack (even when it's not implemented that way).\nWhen a normal function is called, it puts its local variables on the stack, does some computation, then clears the stack and returns. The values of its local variables are never seen again.\nWith a yield function, when its code begins to run (i.e. after the function is called, returning a generator object, whose next() method is then invoked), it similarly puts its local variables onto the stack and computes for a while. But then, when it hits the yield statement, before clearing its part of the stack and returning, it takes a snapshot of its local variables and stores them in the generator object. It also writes down the place where it's currently up to in its code (i.e. the particular yield statement).\nSo it's a kind of a frozen function that the generator is hanging onto.\nWhen next() is called subsequently, it retrieves the function's belongings onto the stack and re-animates it. The function continues to compute from where it left off, oblivious to the fact that it had just spent an eternity in cold storage.\nCompare the following examples:\ndef normalFunction():\n return\n if False:\n pass\n\ndef yielderFunction():\n return\n if False:\n yield 12\nWhen we call the second function, it behaves very differently to the first. The yield statement might be unreachable, but if it's present anywhere, it changes the nature of what we're dealing with.\n>>> yielderFunction()\n\nCalling yielderFunction() doesn't run its code, but makes a generator out of the code. (Maybe it's a good idea to name such things with the yielder prefix for readability.)\n>>> gen = yielderFunction()\n>>> dir(gen)\n['__class__',\n ...\n '__iter__', #Returns gen itself, to make it work uniformly with containers\n ... #when given to a for loop. (Containers return an iterator instead.)\n 'close',\n 'gi_code',\n 'gi_frame',\n 'gi_running',\n 'next', #The method that runs the function's body.\n 'send',\n 'throw']\nThe gi_code and gi_frame fields are where the frozen state is stored. Exploring them with dir(..), we can confirm that our mental model above is credible.\n"},{"upvotes":87,"author":"unimplemented","content":"87\nImagine that you have created a remarkable machine that is capable of generating thousands and thousands of lightbulbs per day. The machine generates these lightbulbs in boxes with a unique serial number. You don't have enough space to store all of these lightbulbs at the same time, so you would like to adjust it to generate lightbulbs on-demand.\nPython generators don't differ much from this concept. Imagine that you have a function called barcode_generator that generates unique serial numbers for the boxes. Obviously, you can have a huge number of such barcodes returned by the function, subject to the hardware (RAM) limitations. A wiser, and space efficient, option is to generate those serial numbers on-demand.\nMachine's code:\ndef barcode_generator():\n serial_number = 10000 # Initial barcode\n while True:\n yield serial_number\n serial_number += 1\n\n\nbarcode = barcode_generator()\nwhile True:\n number_of_lightbulbs_to_generate = int(input(\"How many lightbulbs to generate? \"))\n barcodes = [next(barcode) for _ in range(number_of_lightbulbs_to_generate)]\n print(barcodes)\n\n # function_to_create_the_next_batch_of_lightbulbs(barcodes)\n\n produce_more = input(\"Produce more? [Y/n]: \")\n if produce_more == \"n\":\n break\nNote the next(barcode) bit.\nAs you can see, we have a self-contained “function” to generate the next unique serial number each time. This function returns a generator! As you can see, we are not calling the function each time we need a new serial number, but instead we are using next() given the generator to obtain the next serial number.\nLazy Iterators\nTo be more precise, this generator is a lazy iterator! An iterator is an object that helps us traverse a sequence of objects. It's called lazy because it does not load all the items of the sequence in memory until they are needed. The use of next in the previous example is the explicit way to obtain the next item from the iterator. The implicit way is using for loops:\nfor barcode in barcode_generator():\n print(barcode)\nThis will print barcodes infinitely, yet you will not run out of memory.\nIn other words, a generator looks like a function but behaves like an iterator.\nReal-world application?\nFinally, real-world applications? They are usually useful when you work with big sequences. Imagine reading a huge file from disk with billions of records. Reading the entire file in memory, before you can work with its content, will probably be infeasible (i.e., you will run out of memory).\n"},{"upvotes":83,"author":"unimplemented","content":"83\nAn easy example to understand what it is: yield\ndef f123():\n for _ in range(4):\n yield 1\n yield 2\n\n\nfor i in f123():\n print (i)\nThe output is:\n1 2 1 2 1 2 1 2\n"},{"upvotes":79,"author":"unimplemented","content":"79\nLike every answer suggests, yield is used for creating a sequence generator. It's used for generating some sequence dynamically. For example, while reading a file line by line on a network, you can use the yield function as follows:\ndef getNextLines():\n while con.isOpen():\n yield con.read()\nYou can use it in your code as follows:\nfor line in getNextLines():\n doSomeThing(line)\nExecution Control Transfer gotcha\nThe execution control will be transferred from getNextLines() to the for loop when yield is executed. Thus, every time getNextLines() is invoked, execution begins from the point where it was paused last time.\nThus in short, a function with the following code\ndef simpleYield():\n yield \"first time\"\n yield \"second time\"\n yield \"third time\"\n yield \"Now some useful value {}\".format(12)\n\nfor i in simpleYield():\n print i\nwill print\n\"first time\"\n\"second time\"\n\"third time\"\n\"Now some useful value 12\"\n"},{"upvotes":77,"author":"unimplemented","content":"77\n(My below answer only speaks from the perspective of using Python generator, not the underlying implementation of generator mechanism, which involves some tricks of stack and heap manipulation.)\nWhen yield is used instead of a return in a python function, that function is turned into something special called generator function. That function will return an object of generator type. The yield keyword is a flag to notify the python compiler to treat such function specially. Normal functions will terminate once some value is returned from it. But with the help of the compiler, the generator function can be thought of as resumable. That is, the execution context will be restored and the execution will continue from last run. Until you explicitly call return, which will raise a StopIteration exception (which is also part of the iterator protocol), or reach the end of the function. I found a lot of references about generator but this one from the functional programming perspective is the most digestable.\n(Now I want to talk about the rationale behind generator, and the iterator based on my own understanding. I hope this can help you grasp the essential motivation of iterator and generator. Such concept shows up in other languages as well such as C#.)\nAs I understand, when we want to process a bunch of data, we usually first store the data somewhere and then process it one by one. But this naive approach is problematic. If the data volume is huge, it's expensive to store them as a whole beforehand. So instead of storing the data itself directly, why not store some kind of metadata indirectly, i.e. the logic how the data is computed.\nThere are 2 approaches to wrap such metadata.\nThe OO approach, we wrap the metadata as a class. This is the so-called iterator who implements the iterator protocol (i.e. the __next__(), and __iter__() methods). This is also the commonly seen iterator design pattern.\nThe functional approach, we wrap the metadata as a function. This is the so-called generator function. But under the hood, the returned generator object still IS-A iterator because it also implements the iterator protocol.\nEither way, an iterator is created, i.e. some object that can give you the data you want. The OO approach may be a bit complex. Anyway, which one to use is up to you.\n"},{"upvotes":76,"author":"unimplemented","content":"76\nIn summary, the yield statement transforms your function into a factory that produces a special object called a generator which wraps around the body of your original function. When the generator is iterated, it executes your function until it reaches the next yield then suspends execution and evaluates to the value passed to yield. It repeats this process on each iteration until the path of execution exits the function. For instance,\ndef simple_generator():\n yield 'one'\n yield 'two'\n yield 'three'\n\nfor i in simple_generator():\n print i\nsimply outputs\none\ntwo\nthree\nThe power comes from using the generator with a loop that calculates a sequence, the generator executes the loop stopping each time to 'yield' the next result of the calculation, in this way it calculates a list on the fly, the benefit being the memory saved for especially large calculations\nSay you wanted to create a your own range function that produces an iterable range of numbers, you could do it like so,\ndef myRangeNaive(i):\n n = 0\n range = []\n while n < i:\n range.append(n)\n n = n + 1\n return range\nand use it like this;\nfor i in myRangeNaive(10):\n print i\nBut this is inefficient because\nYou create an array that you only use once (this wastes memory)\nThis code actually loops over that array twice! :(\nLuckily Guido and his team were generous enough to develop generators so we could just do this;\ndef myRangeSmart(i):\n n = 0\n while n < i:\n yield n\n n = n + 1\n return\n\nfor i in myRangeSmart(10):\n print i\nNow upon each iteration a function on the generator called next() executes the function until it either reaches a 'yield' statement in which it stops and 'yields' the value or reaches the end of the function. In this case on the first call, next() executes up to the yield statement and yield 'n', on the next call it will execute the increment statement, jump back to the 'while', evaluate it, and if true, it will stop and yield 'n' again, it will continue that way until the while condition returns false and the generator jumps to the end of the function.\n"},{"upvotes":71,"author":"unimplemented","content":"71\nYield is an object\nA return in a function will return a single value.\nIf you want a function to return a huge set of values, use yield.\nMore importantly, yield is a barrier.\nlike barrier in the CUDA language, it will not transfer control until it gets completed.\nThat is, it will run the code in your function from the beginning until it hits yield. Then, it’ll return the first value of the loop.\nThen, every other call will run the loop you have written in the function one more time, returning the next value until there isn't any value to return.\n"},{"upvotes":69,"author":"unimplemented","content":"69\nMany people use return rather than yield, but in some cases yield can be more efficient and easier to work with.\nHere is an example which yield is definitely best for:\nreturn (in function)\nimport random\n\ndef return_dates():\n dates = [] # With 'return' you need to create a list then return it\n for i in range(5):\n date = random.choice([\"1st\", \"2nd\", \"3rd\", \"4th\", \"5th\", \"6th\", \"7th\", \"8th\", \"9th\", \"10th\"])\n dates.append(date)\n return dates\nyield (in function)\ndef yield_dates():\n for i in range(5):\n date = random.choice([\"1st\", \"2nd\", \"3rd\", \"4th\", \"5th\", \"6th\", \"7th\", \"8th\", \"9th\", \"10th\"])\n yield date # 'yield' makes a generator automatically which works\n # in a similar way. This is much more efficient.\nCalling functions\ndates_list = return_dates()\nprint(dates_list)\nfor i in dates_list:\n print(i)\n\ndates_generator = yield_dates()\nprint(dates_generator)\nfor i in dates_generator:\n print(i)\nBoth functions do the same thing, but yield uses three lines instead of five and has one less variable to worry about.\nThis is the result from the code:\nAs you can see both functions do the same thing. The only difference is return_dates() gives a list and yield_dates() gives a generator.\nA real life example would be something like reading a file line by line or if you just want to make a generator.\n"},{"upvotes":59,"author":"unimplemented","content":"59\nThe yield keyword simply collects returning results. Think of yield like return +=\n"},{"upvotes":56,"author":"unimplemented","content":"56\nyield is like a return element for a function. The difference is, that the yield element turns a function into a generator. A generator behaves just like a function until something is 'yielded'. The generator stops until it is next called, and continues from exactly the same point as it started. You can get a sequence of all the 'yielded' values in one, by calling list(generator()).\n"},{"upvotes":16747,"author":"unimplemented","content":"16747\nFind the index of the array element you want to remove using indexOf, and then remove that index with splice.\nThe splice() method changes the contents of an array by removing existing elements and/or adding new elements.\nconst array = [2, 5, 9];\n\nconsole.log(array);\n\nconst index = array.indexOf(5);\nif (index > -1) { // only splice array when item is found\n array.splice(index, 1); // 2nd parameter means remove one item only\n}\n\n// array = [2, 9]\nconsole.log(array); \nThe second parameter of splice is the number of elements to remove. Note that splice modifies the array in place and returns a new array containing the elements that have been removed.\nFor the reason of completeness, here are functions. The first function removes only a single occurrence (i.e. removing the first match of 5 from [2,5,9,1,5,8,5]), while the second function removes all occurrences:\nfunction removeItemOnce(arr, value) {\n var index = arr.indexOf(value);\n if (index > -1) {\n arr.splice(index, 1);\n }\n return arr;\n}\n\nfunction removeItemAll(arr, value) {\n var i = 0;\n while (i < arr.length) {\n if (arr[i] === value) {\n arr.splice(i, 1);\n } else {\n ++i;\n }\n }\n return arr;\n}\n// Usage\nconsole.log(removeItemOnce([2,5,9,1,5,8,5], 5))\nconsole.log(removeItemAll([2,5,9,1,5,8,5], 5))\nIn TypeScript, these functions can stay type-safe with a type parameter:\nfunction removeItem(arr: Array, value: T): Array { \n const index = arr.indexOf(value);\n if (index > -1) {\n arr.splice(index, 1);\n }\n return arr;\n}\n"},{"upvotes":2541,"author":"unimplemented","content":"2541\nEdited on 2016 October\nDo it simple, intuitive and explicit (Occam's razor)\nDo it immutable (original array stays unchanged)\nDo it with standard JavaScript functions, if your browser doesn't support them - use polyfill\nIn this code example I use array.filter(...) function to remove unwanted items from an array. This function doesn't change the original array and creates a new one. If your browser doesn't support this function (e.g. Internet Explorer before version 9, or Firefox before version 1.5), consider polyfilling with core-js.\nBe mindful though, creating a new array every time takes a big performance hit. If the list is very large (think 10k+ items) then consider using other methods.\nRemoving item (ECMA-262 Edition 5 code AKA old style JavaScript)\nvar value = 3\n\nvar arr = [1, 2, 3, 4, 5, 3]\n\narr = arr.filter(function(item) {\n return item !== value\n})\n\nconsole.log(arr)\n// [ 1, 2, 4, 5 ]\nRemoving item (ECMAScript 6 code)\nlet value = 3\n\nlet arr = [1, 2, 3, 4, 5, 3]\n\narr = arr.filter(item => item !== value)\n\nconsole.log(arr)\n// [ 1, 2, 4, 5 ]\nIMPORTANT ECMAScript 6 () => {} arrow function syntax is not supported in Internet Explorer at all, Chrome before version 45, Firefox before version 22, and Safari before version 10. To use ECMAScript 6 syntax in old browsers you can use BabelJS.\nRemoving multiple items (ECMAScript 7 code)\nAn additional advantage of this method is that you can remove multiple items\nlet forDeletion = [2, 3, 5]\n\nlet arr = [1, 2, 3, 4, 5, 3]\n\narr = arr.filter(item => !forDeletion.includes(item))\n// !!! Read below about array.includes(...) support !!!\n\nconsole.log(arr)\n// [ 1, 4 ]\nIMPORTANT array.includes(...) function is not supported in Internet Explorer at all, Chrome before version 47, Firefox before version 43, Safari before version 9, and Edge before version 14 but you can polyfill with core-js.\nRemoving multiple items (in the future, maybe)\nIf the \"This-Binding Syntax\" proposal is ever accepted, you'll be able to do this:\n// array-lib.js\n\nexport function remove(...forDeletion) {\n return this.filter(item => !forDeletion.includes(item))\n}\n\n// main.js\n\nimport { remove } from './array-lib.js'\n\nlet arr = [1, 2, 3, 4, 5, 3]\n\n// :: This-Binding Syntax Proposal\n// using \"remove\" function as \"virtual method\"\n// without extending Array.prototype\narr = arr::remove(2, 3, 5)\n\nconsole.log(arr)\n// [ 1, 4 ]\nTry it yourself in BabelJS :)\nReference\nArray.prototype.includes\nFunctional composition\n"},{"upvotes":1777,"author":"unimplemented","content":"1777\nI don't know how you are expecting array.remove(int) to behave. There are three possibilities I can think of that you might want.\nTo remove an element of an array at an index i:\narray.splice(i, 1);\nIf you want to remove every element with value number from the array:\nfor (var i = array.length - 1; i >= 0; i--) {\n if (array[i] === number) {\n array.splice(i, 1);\n }\n}\nIf you just want to make the element at index i no longer exist, but you don't want the indexes of the other elements to change:\ndelete array[i];\n"},{"upvotes":680,"author":"unimplemented","content":"680\nIt depends on whether you want to keep an empty spot or not.\nIf you do want an empty slot:\narray[index] = undefined;\nIf you don't want an empty slot:\n//To keep the original:\n//oldArray = [...array];\n\n//This modifies the array.\narray.splice(index, 1);\nAnd if you need the value of that item, you can just store the returned array's element:\nvar value = array.splice(index, 1)[0];\nIf you want to remove at either end of the array, you can use array.pop() for the last one or array.shift() for the first one (both return the value of the item as well).\nIf you don't know the index of the item, you can use array.indexOf(item) to get it (in a if() to get one item or in a while() to get all of them). array.indexOf(item) returns either the index or -1 if not found. \n"},{"upvotes":426,"author":"unimplemented","content":"426\nA friend was having issues in Internet Explorer 8 and showed me what he did. I told him it was wrong, and he told me he got the answer here. The current top answer will not work in all browsers (Internet Explorer 8 for example), and it will only remove the first occurrence of the item.\nRemove ALL instances from an array\nfunction removeAllInstances(arr, item) {\n for (var i = arr.length; i--;) {\n if (arr[i] === item) arr.splice(i, 1);\n }\n}\nIt loops through the array backwards (since indices and length will change as items are removed) and removes the item if it's found. It works in all browsers.\n"},{"upvotes":352,"author":"unimplemented","content":"352\nThere are two major approaches\nsplice(): anArray.splice(index, 1);\n let fruits = ['Apple', 'Banana', 'Mango', 'Orange']\n let removed = fruits.splice(2, 1);\n // fruits is ['Apple', 'Banana', 'Orange']\n // removed is ['Mango']\ndelete: delete anArray[index];\n let fruits = ['Apple', 'Banana', 'Mango', 'Orange']\n let removed = delete fruits(2);\n // fruits is ['Apple', 'Banana', undefined, 'Orange']\n // removed is true\nBe careful when you use the delete for an array. It is good for deleting attributes of objects, but not so good for arrays. It is better to use splice for arrays.\nKeep in mind that when you use delete for an array you could get wrong results for anArray.length. In other words, delete would remove the element, but it wouldn't update the value of the length property.\nYou can also expect to have holes in index numbers after using delete, e.g. you could end up with having indexes 1, 3, 4, 8, 9, and 11 and length as it was before using delete. In that case, all indexed for loops would crash, since indexes are no longer sequential.\nIf you are forced to use delete for some reason, then you should use for each loops when you need to loop through arrays. As the matter of fact, always avoid using indexed for loops, if possible. That way the code would be more robust and less prone to problems with indexes.\n"},{"upvotes":283,"author":"unimplemented","content":"283\nArray.prototype.removeByValue = function (val) {\n for (var i = 0; i < this.length; i++) {\n if (this[i] === val) {\n this.splice(i, 1);\n i--;\n }\n }\n return this;\n}\n\nvar fruits = ['apple', 'banana', 'carrot', 'orange'];\nfruits.removeByValue('banana');\n\nconsole.log(fruits);\n// -> ['apple', 'carrot', 'orange']\n"},{"upvotes":207,"author":"unimplemented","content":"207\nThere isn't any need to use indexOf or splice. However, it performs better if you only want to remove one occurrence of an element.\nFind and move (move):\nfunction move(arr, val) {\n var j = 0;\n for (var i = 0, l = arr.length; i < l; i++) {\n if (arr[i] !== val) {\n arr[j++] = arr[i];\n }\n }\n arr.length = j;\n}\nUse indexOf and splice (indexof):\nfunction indexof(arr, val) {\n var i;\n while ((i = arr.indexOf(val)) != -1) {\n arr.splice(i, 1);\n }\n}\nUse only splice (splice):\nfunction splice(arr, val) {\n for (var i = arr.length; i--;) {\n if (arr[i] === val) {\n arr.splice(i, 1);\n }\n }\n}\nRun-times on Node.js for an array with 1000 elements (averaged over 10,000 runs):\nindexof is approximately 10 times slower than move. Even if improved by removing the call to indexOf in splice, it performs much worse than move.\nRemove all occurrences:\n move 0.0048 ms\n indexof 0.0463 ms\n splice 0.0359 ms\n\nRemove first occurrence:\n move_one 0.0041 ms\n indexof_one 0.0021 ms\n"},{"upvotes":201,"author":"unimplemented","content":"201\nfilter is an elegant way to accomplish this without mutating the original array\nconst num = 3;\nlet arr = [1, 2, 3, 4];\nconst arr2 = arr.filter(x => x !== num);\nconsole.log(arr); // [1, 2, 3, 4]\nconsole.log(arr2); // [1, 2, 4]\nYou can use filter and then assign the result to the original array if you want to achieve a mutation removal behaviour.\nconst num = 3;\nlet arr = [1, 2, 3, 4];\narr = arr.filter(x => x !== num);\nconsole.log(arr); // [1, 2, 4]\nBy the way, filter will remove all of the occurrences matched in the condition (not just the first occurrence) like you can see in the following example\nconst num = 3;\nlet arr = [1, 2, 3, 3, 3, 4];\narr = arr.filter(x => x !== num);\nconsole.log(arr); // [1, 2, 4]\nIn case, you just want to remove the first occurrence, you can use the splice method\nconst num = 3;\nlet arr = [1, 2, 3, 3, 3, 4];\nconst idx = arr.indexOf(num);\narr.splice(idx, idx !== -1 ? 1 : 0);\nconsole.log(arr); // [1, 2, 3, 3, 4]\n"},{"upvotes":176,"author":"unimplemented","content":"176\nThis provides a predicate instead of a value.\nNOTE: it will update the given array, and return the affected rows.\nUsage\nvar removed = helper.remove(arr, row => row.id === 5 );\n\nvar removed = helper.removeAll(arr, row => row.name.startsWith('BMW'));\nDefinition\nvar helper = {\n // Remove and return the first occurrence\n\n remove: function(array, predicate) {\n for (var i = 0; i < array.length; i++) {\n if (predicate(array[i])) {\n return array.splice(i, 1);\n }\n }\n },\n\n // Remove and return all occurrences\n\n removeAll: function(array, predicate) {\n var removed = [];\n\n for (var i = 0; i < array.length; ) {\n if (predicate(array[i])) {\n removed.push(array.splice(i, 1));\n continue;\n }\n i++;\n }\n return removed;\n },\n};\n"},{"upvotes":152,"author":"unimplemented","content":"152\nYou can do it easily with the filter method:\nfunction remove(arrOriginal, elementToRemove){\n return arrOriginal.filter(function(el){return el !== elementToRemove});\n}\nconsole.log(remove([1, 2, 1, 0, 3, 1, 4], 1));\nThis removes all elements from the array and also works faster than a combination of slice and indexOf.\n"},{"upvotes":140,"author":"unimplemented","content":"140\nJohn Resig posted a good implementation:\n// Array Remove - By John Resig (MIT Licensed)\nArray.prototype.remove = function(from, to) {\n var rest = this.slice((to || from) + 1 || this.length);\n this.length = from < 0 ? this.length + from : from;\n return this.push.apply(this, rest);\n};\nIf you don’t want to extend a global object, you can do something like the following, instead:\n// Array Remove - By John Resig (MIT Licensed)\nArray.remove = function(array, from, to) {\n var rest = array.slice((to || from) + 1 || array.length);\n array.length = from < 0 ? array.length + from : from;\n return array.push.apply(array, rest);\n};\nBut the main reason I am posting this is to warn users against the alternative implementation suggested in the comments on that page (Dec 14, 2007):\nArray.prototype.remove = function(from, to) {\n this.splice(from, (to=[0, from || 1, ++to - from][arguments.length]) < 0 ? this.length + to : to);\n return this.length;\n};\nIt seems to work well at first, but through a painful process I discovered it fails when trying to remove the second to last element in an array. For example, if you have a 10-element array and you try to remove the 9th element with this:\nmyArray.remove(8);\nYou end up with an 8-element array. I don't know why, but I confirmed John's original implementation doesn't have this problem.\n"},{"upvotes":136,"author":"unimplemented","content":"136\nYou can use ES6. For example to delete the value '3' in this case:\nvar array=['1','2','3','4','5','6']\nvar newArray = array.filter((value)=>value!='3');\nconsole.log(newArray);\nOutput :\n[\"1\", \"2\", \"4\", \"5\", \"6\"]\n"},{"upvotes":129,"author":"unimplemented","content":"129\nUnderscore.js can be used to solve issues with multiple browsers. It uses in-build browser methods if present. If they are absent like in the case of older Internet Explorer versions it uses its own custom methods.\nA simple example to remove elements from array (from the website):\n_.without([1, 2, 1, 0, 3, 1, 4], 0, 1); // => [2, 3, 4]\n"},{"upvotes":124,"author":"unimplemented","content":"124\nHere are a few ways to remove an item from an array using JavaScript.\nAll the method described do not mutate the original array, and instead create a new one.\nIf you know the index of an item\nSuppose you have an array, and you want to remove an item in position i.\nOne method is to use slice():\nconst items = ['a', 'b', 'c', 'd', 'e', 'f']\nconst i = 3\nconst filteredItems = items.slice(0, i).concat(items.slice(i+1, items.length))\n\nconsole.log(filteredItems)\nslice() creates a new array with the indexes it receives. We simply create a new array, from start to the index we want to remove, and concatenate another array from the first position following the one we removed to the end of the array.\nIf you know the value\nIn this case, one good option is to use filter(), which offers a more declarative approach:\nconst items = ['a', 'b', 'c', 'd', 'e', 'f']\nconst valueToRemove = 'c'\nconst filteredItems = items.filter(item => item !== valueToRemove)\n\nconsole.log(filteredItems)\nThis uses the ES6 arrow functions. You can use the traditional functions to support older browsers:\nconst items = ['a', 'b', 'c', 'd', 'e', 'f']\nconst valueToRemove = 'c'\nconst filteredItems = items.filter(function(item) {\n return item !== valueToRemove\n})\n\nconsole.log(filteredItems)\nor you can use Babel and transpile the ES6 code back to ES5 to make it more digestible to old browsers, yet write modern JavaScript in your code.\nRemoving multiple items\nWhat if instead of a single item, you want to remove many items?\nLet's find the simplest solution.\nBy index\nYou can just create a function and remove items in series:\nconst items = ['a', 'b', 'c', 'd', 'e', 'f']\n\nconst removeItem = (items, i) =>\n items.slice(0, i-1).concat(items.slice(i, items.length))\n\nlet filteredItems = removeItem(items, 3)\nfilteredItems = removeItem(filteredItems, 5)\n//[\"a\", \"b\", \"c\", \"d\"]\n\nconsole.log(filteredItems)\nBy value\nYou can search for inclusion inside the callback function:\nconst items = ['a', 'b', 'c', 'd', 'e', 'f']\nconst valuesToRemove = ['c', 'd']\nconst filteredItems = items.filter(item => !valuesToRemove.includes(item))\n// [\"a\", \"b\", \"e\", \"f\"]\n\nconsole.log(filteredItems)\nAvoid mutating the original array\nsplice() (not to be confused with slice()) mutates the original array, and should be avoided.\n(originally posted on my site https://flaviocopes.com/how-to-remove-item-from-array/)\n"},{"upvotes":121,"author":"unimplemented","content":"121\nIf you want a new array with the deleted positions removed, you can always delete the specific element and filter out the array. It might need an extension of the array object for browsers that don't implement the filter method, but in the long term it's easier since all you do is this:\nvar my_array = [1, 2, 3, 4, 5, 6];\ndelete my_array[4];\nconsole.log(my_array.filter(function(a){return typeof a !== 'undefined';}));\nIt should display [1, 2, 3, 4, 6].\n"},{"upvotes":106,"author":"unimplemented","content":"106\nCheck out this code. It works in every major browser.\nremove_item = function(arr, value) {\n var b = '';\n for (b in arr) {\n if (arr[b] === value) {\n arr.splice(b, 1);\n break;\n }\n }\n return arr;\n};\n\nvar array = [1,3,5,6,5,9,5,3,55]\nvar res = remove_item(array,5);\nconsole.log(res)\n"},{"upvotes":93,"author":"unimplemented","content":"93\nRemoving a particular element/string from an array can be done in a one-liner:\ntheArray.splice(theArray.indexOf(\"stringToRemoveFromArray\"), 1);\nwhere:\ntheArray: the array you want to remove something particular from\nstringToRemoveFromArray: the string you want to be removed and 1 is the number of elements you want to remove.\nNOTE: If \"stringToRemoveFromArray\" is not located in the array, this will remove the last element of the array.\nIt's always good practice to check if the element exists in your array first, before removing it.\nif (theArray.indexOf(\"stringToRemoveFromArray\") >= 0){\n theArray.splice(theArray.indexOf(\"stringToRemoveFromArray\"), 1);\n}\nDepending if you have newer or older version of Ecmascript running on your client's computers:\nvar array=['1','2','3','4','5','6']\nvar newArray = array.filter((value)=>value!='3');\nOR\nvar array = ['1','2','3','4','5','6'];\nvar newArray = array.filter(function(item){ return item !== '3' });\nWhere '3' is the value you want to be removed from the array. The array would then become : ['1','2','4','5','6']\n"},{"upvotes":91,"author":"unimplemented","content":"91\nES10\nThis post summarizes common approaches to element removal from an array as of ECMAScript 2019 (ES10).\n1. General cases\n1.1. Removing Array element by value using .splice()\n| In-place: Yes |\n| Removes duplicates: Yes(loop), No(indexOf) |\n| By value / index: By index |\nIf you know the value you want to remove from an array you can use the splice method. First, you must identify the index of the target item. You then use the index as the start element and remove just one element.\n// With a 'for' loop\nconst arr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0];\nfor( let i = 0; i < arr.length; i++){\n if ( arr[i] === 5) {\n arr.splice(i, 1);\n }\n} // => [1, 2, 3, 4, 6, 7, 8, 9, 0]\n\n// With the .indexOf() method\nconst arr = [1, 2, 3, 4, 5, 6, 7, 8, 9, 0];\nconst i = arr.indexOf(5);\narr.splice(i, 1); // => [1, 2, 3, 4, 6, 7, 8, 9, 0]\n1.2. Removing Array element using the .filter() method\n| In-place: No |\n| Removes duplicates: Yes |\n| By value / index: By value |\nThe specific element can be filtered out from the array, by providing a filtering function. Such function is then called for every element in the array.\nconst value = 3\nlet arr = [1, 2, 3, 4, 5, 3]\narr = arr.filter(item => item !== value)\nconsole.log(arr)\n// [ 1, 2, 4, 5 ]\n1.3. Removing Array element by extending Array.prototype\n| In-place: Yes/No (Depends on implementation) |\n| Removes duplicates: Yes/No (Depends on implementation) |\n| By value / index: By index / By value (Depends on implementation) |\nThe prototype of Array can be extended with additional methods. Such methods will be then available to use on created arrays.\nNote: Extending prototypes of objects from the standard library of JavaScript (like Array) is considered by some as an antipattern.\n// In-place, removes all, by value implementation\nArray.prototype.remove = function(item) {\n for (let i = 0; i < this.length; i++) {\n if (this[i] === item) {\n this.splice(i, 1);\n }\n }\n}\nconst arr1 = [1,2,3,1];\narr1.remove(1) // arr1 equals [2,3]\n\n// Non-stationary, removes first, by value implementation\nArray.prototype.remove = function(item) {\n const arr = this.slice();\n for (let i = 0; i < this.length; i++) {\n if (arr[i] === item) {\n arr.splice(i, 1);\n return arr;\n }\n }\n return arr;\n}\nlet arr2 = [1,2,3,1];\narr2 = arr2.remove(1) // arr2 equals [2,3,1]\n1.4. Removing Array element using the delete operator\n| In-place: Yes |\n| Removes duplicates: No |\n| By value / index: By index |\nUsing the delete operator does not affect the length property. Nor does it affect the indexes of subsequent elements. The array becomes sparse, which is a fancy way of saying the deleted item is not removed but becomes undefined.\nconst arr = [1, 2, 3, 4, 5, 6];\ndelete arr[4]; // Delete element with index 4\nconsole.log( arr ); // [1, 2, 3, 4, undefined, 6]\nThe delete operator is designed to remove properties from JavaScript objects, which arrays are objects.\n1.5. Removing Array element using Object utilities (>= ES10)\n| In-place: No |\n| Removes duplicates: Yes |\n| By value / index: By value |\nES10 introduced Object.fromEntries, which can be used to create the desired Array from any Array-like object and filter unwanted elements during the process.\nconst object = [1,2,3,4];\nconst valueToRemove = 3;\nconst arr = Object.values(Object.fromEntries(\n Object.entries(object)\n .filter(([ key, val ]) => val !== valueToRemove)\n));\nconsole.log(arr); // [1,2,4]\n2. Special cases\n2.1 Removing element if it's at the end of the Array\n2.1.1. Changing Array length\n| In-place: Yes |\n| Removes duplicates: No |\n| By value / index: N/A |\nJavaScript Array elements can be removed from the end of an array by setting the length property to a value less than the current value. Any element whose index is greater than or equal to the new length will be removed.\nconst arr = [1, 2, 3, 4, 5, 6];\narr.length = 5; // Set length to remove element\nconsole.log( arr ); // [1, 2, 3, 4, 5]\n2.1.2. Using .pop() method\n| In-place: Yes |\n| Removes duplicates: No |\n| By value / index: N/A |\nThe pop method removes the last element of the array, returns that element, and updates the length property. The pop method modifies the array on which it is invoked, This means unlike using delete the last element is removed completely and the array length reduced.\nconst arr = [1, 2, 3, 4, 5, 6];\narr.pop(); // returns 6\nconsole.log( arr ); // [1, 2, 3, 4, 5]\n2.2. Removing element if it's at the beginning of the Array\n| In-place: Yes |\n| Removes duplicates: No |\n| By value / index: N/A |\nThe .shift() method works much like the pop method except it removes the first element of a JavaScript array instead of the last. When the element is removed the remaining elements are shifted down.\nconst arr = [1, 2, 3, 4];\narr.shift(); // returns 1\nconsole.log( arr ); // [2, 3, 4]\n2.3. Removing element if it's the only element in the Array\n| In-place: Yes |\n| Removes duplicates: N/A |\n| By value / index: N/A |\nThe fastest technique is to set an array variable to an empty array.\nlet arr = [1];\narr = []; //empty array\nAlternatively technique from 2.1.1 can be used by setting length to 0.\n"},{"upvotes":86,"author":"unimplemented","content":"86\nYou can use lodash _.pull (mutate array), _.pullAt (mutate array) or _.without (does't mutate array),\nvar array1 = ['a', 'b', 'c', 'd']\n_.pull(array1, 'c')\nconsole.log(array1) // ['a', 'b', 'd']\n\nvar array2 = ['e', 'f', 'g', 'h']\n_.pullAt(array2, 0)\nconsole.log(array2) // ['f', 'g', 'h']\n\nvar array3 = ['i', 'j', 'k', 'l']\nvar newArray = _.without(array3, 'i') // ['j', 'k', 'l']\nconsole.log(array3) // ['i', 'j', 'k', 'l']\n"},{"upvotes":85,"author":"unimplemented","content":"85\nES6 & without mutation: (October 2016)\nconst removeByIndex = (list, index) =>\n [\n ...list.slice(0, index),\n ...list.slice(index + 1)\n ];\n \noutput = removeByIndex([33,22,11,44],1) //=> [33,11,44]\n \nconsole.log(output)\n"},{"upvotes":78,"author":"unimplemented","content":"78\nPerformance\nToday (2019-12-09) I conduct performance tests on macOS v10.13.6 (High Sierra) for chosen solutions. I show delete (A), but I do not use it in comparison with other methods, because it left empty space in the array.\nThe conclusions\nthe fastest solution is array.splice (C) (except Safari for small arrays where it has the second time)\nfor big arrays, array.slice+splice (H) is the fastest immutable solution for Firefox and Safari; Array.from (B) is fastest in Chrome\nmutable solutions are usually 1.5x-6x faster than immutable\nfor small tables on Safari, surprisingly the mutable solution (C) is slower than the immutable solution (G)\nDetails\nIn tests, I remove the middle element from the array in different ways. The A, C solutions are in-place. The B, D, E, F, G, H solutions are immutable.\nResults for an array with 10 elements\nIn Chrome the array.splice (C) is the fastest in-place solution. The array.filter (D) is the fastest immutable solution. The slowest is array.slice (F). You can perform the test on your machine here.\nResults for an array with 1.000.000 elements\nIn Chrome the array.splice (C) is the fastest in-place solution (the delete (C) is similar fast - but it left an empty slot in the array (so it does not perform a 'full remove')). The array.slice-splice (H) is the fastest immutable solution. The slowest is array.filter (D and E). You can perform the test on your machine here.\nComparison for browsers: Chrome v78.0.0, Safari v13.0.4, and Firefox v71.0.0\n"},{"upvotes":62,"author":"unimplemented","content":"62\nOK, for example you have the array below:\nvar num = [1, 2, 3, 4, 5];\nAnd we want to delete number 4. You can simply use the below code:\nnum.splice(num.indexOf(4), 1); // num will be [1, 2, 3, 5];\nIf you are reusing this function, you write a reusable function which will be attached to the native array function like below:\nArray.prototype.remove = Array.prototype.remove || function(x) {\n const i = this.indexOf(x);\n if(i===-1)\n return;\n this.splice(i, 1); // num.remove(5) === [1, 2, 3];\n}\nBut how about if you have the below array instead with a few [5]s in the array?\nvar num = [5, 6, 5, 4, 5, 1, 5];\nWe need a loop to check them all, but an easier and more efficient way is using built-in JavaScript functions, so we write a function which use a filter like below instead:\nconst _removeValue = (arr, x) => arr.filter(n => n!==x);\n//_removeValue([1, 2, 3, 4, 5, 5, 6, 5], 5) // Return [1, 2, 3, 4, 6]\nAlso there are third-party libraries which do help you to do this, like Lodash or Underscore. For more information, look at lodash _.pull, _.pullAt or _.without.\n"},{"upvotes":60,"author":"unimplemented","content":"60\nI'm pretty new to JavaScript and needed this functionality. I merely wrote this:\nfunction removeFromArray(array, item, index) {\n while((index = array.indexOf(item)) > -1) {\n array.splice(index, 1);\n }\n}\nThen when I want to use it:\n//Set-up some dummy data\nvar dummyObj = {name:\"meow\"};\nvar dummyArray = [dummyObj, \"item1\", \"item1\", \"item2\"];\n\n//Remove the dummy data\nremoveFromArray(dummyArray, dummyObj);\nremoveFromArray(dummyArray, \"item2\");\nOutput - As expected. [\"item1\", \"item1\"]\nYou may have different needs than I, so you can easily modify it to suit them. I hope this helps someone.\n"},{"upvotes":58,"author":"unimplemented","content":"58\nI want to answer based on ECMAScript 6. Assume you have an array like below:\nlet arr = [1,2,3,4];\nIf you want to delete at a special index like 2, write the below code:\narr.splice(2, 1); //=> arr became [1,2,4]\nBut if you want to delete a special item like 3 and you don't know its index, do like below:\narr = arr.filter(e => e !== 3); //=> arr became [1,2,4]\nHint: please use an arrow function for filter callback unless you will get an empty array.\n"},{"upvotes":54,"author":"unimplemented","content":"54\nUpdate: This method is recommended only if you cannot use ECMAScript 2015 (formerly known as ES6). If you can use it, other answers here provide much neater implementations.\nThis gist here will solve your problem, and also deletes all occurrences of the argument instead of just 1 (or a specified value).\nArray.prototype.destroy = function(obj){\n // Return null if no objects were found and removed\n var destroyed = null;\n\n for(var i = 0; i < this.length; i++){\n\n // Use while-loop to find adjacent equal objects\n while(this[i] === obj){\n\n // Remove this[i] and store it within destroyed\n destroyed = this.splice(i, 1)[0];\n }\n }\n\n return destroyed;\n}\nUsage:\nvar x = [1, 2, 3, 3, true, false, undefined, false];\n\nx.destroy(3); // => 3\nx.destroy(false); // => false\nx; // => [1, 2, true, undefined]\n\nx.destroy(true); // => true\nx.destroy(undefined); // => undefined\nx; // => [1, 2]\n\nx.destroy(3); // => null\nx; // => [1, 2]\n"},{"upvotes":54,"author":"unimplemented","content":"54\nYou should never mutate your array as this is against the functional programming pattern. You can create a new array without referencing the one you want to change data of using the ECMAScript 6 method filter;\nvar myArray = [1, 2, 3, 4, 5, 6];\nSuppose you want to remove 5 from the array, you can simply do it like this:\nmyArray = myArray.filter(value => value !== 5);\nThis will give you a new array without the value you wanted to remove. So the result will be:\n [1, 2, 3, 4, 6]; // 5 has been removed from this array\nFor further understanding you can read the MDN documentation on Array.filter.\n"},{"upvotes":53,"author":"unimplemented","content":"53\nIf you have complex objects in the array you can use filters? In situations where $.inArray or array.splice is not as easy to use. Especially if the objects are perhaps shallow in the array.\nE.g. if you have an object with an id field and you want the object removed from an array:\nthis.array = this.array.filter(function(element, i) {\n return element.id !== idToRemove;\n});\n"},{"upvotes":42,"author":"unimplemented","content":"42\nYou can do a backward loop to make sure not to screw up the indexes, if there are multiple instances of the element.\nvar myElement = \"chocolate\";\nvar myArray = ['chocolate', 'poptart', 'poptart', 'poptart', 'chocolate', 'poptart', 'poptart', 'chocolate'];\n\n/* Important code */\nfor (var i = myArray.length - 1; i >= 0; i--) {\n if (myArray[i] == myElement) myArray.splice(i, 1);\n}\nconsole.log(myArray);\n"},{"upvotes":38,"author":"unimplemented","content":"38\nA more modern, ECMAScript 2015 (formerly known as Harmony or ES 6) approach. Given:\nconst items = [1, 2, 3, 4];\nconst index = 2;\nThen:\nitems.filter((x, i) => i !== index);\nYielding:\n[1, 2, 4]\nYou can use Babel and a polyfill service to ensure this is well supported across browsers.\n"},{"upvotes":19121,"author":"unimplemented","content":"19121\nTo rename the current branch:\ngit branch -m \nTo rename a branch while pointed to any branch:\ngit branch -m \n-m is short for --move.\nTo push the local branch and reset the upstream branch:\ngit push origin -u \nTo delete the remote branch:\ngit push origin --delete \nTo create a git rename alias:\ngit config --global alias.rename 'branch -m'\nOn Windows or another case-insensitive filesystem, use -M if there are only capitalization changes in the name. Otherwise, Git will throw a \"branch already exists\" error.\ngit branch -M \n"},{"upvotes":643,"author":"unimplemented","content":"643\nYou can rename a local Git branch using the following command:\ngit branch -m old_branch_name new_branch_name\nKeep in mind that when you rename a branch, it still maintains its association with the old upstream branch if there was one.\nTo push changes to the master branch after renaming your local branch to new_branch_name, use the following command:\ngit push origin new_branch_name:master\nWith this command, your changes will be pushed to the master branch on the remote repository. However, your local branch will still be named new_branch_name.\nFor more details, see: How to rename your local branch name in Git.\n"},{"upvotes":470,"author":"unimplemented","content":"470\nTo rename your current branch:\ngit branch -m \n"},{"upvotes":405,"author":"unimplemented","content":"405\nHere are the steps to rename the branch:\nSwitch to the branch which needs to be renamed\ngit branch -m \ngit push origin :\ngit push origin :refs/heads/\nEDIT (12/01/2017): Make sure you run command git status and check that the newly created branch is pointing to its own ref and not the older one. If you find the reference to the older branch, you need to unset the upstream using:\ngit branch --unset-upstream\n"},{"upvotes":308,"author":"unimplemented","content":"308\nRename the branch will be useful once your branch is finished. Then new stuff is coming, and you want to develop in the same branch instead of deleting it and create the new one.\nFrom my experience, to rename a local and remote branch in Git you should do the following steps.\nQuoting from Multiple States - Rename a local and remote branch in git\n1. Rename your local branch\nIf you are on the branch you want to rename:\ngit branch -m new-name\nIf you are on a different branch:\ngit branch -m old-name new-name\n2. Delete the old-name remote branch and push the new-name local branch\ngit push origin :old-name new-name\n3. Reset the upstream branch for the new-name local branch\ngit push origin -u new-name\n"},{"upvotes":143,"author":"unimplemented","content":"143\nThe answers so far have been correct, but here is some additional information:\nOne can safely rename a branch with '-m' (move), but one has to be careful with '-M', because it forces the rename, even if there is an existing branch with the same name already. Here is the excerpt from the 'git-branch' man page:\nWith a -m or -M option, will be renamed to . If had a corresponding reflog, it is renamed to match , and a reflog entry is created to remember the branch renaming. If exists, -M must be used to force the rename to happen.\n"},{"upvotes":124,"author":"unimplemented","content":"124\nTo rename your current branch to a new branch name:\ngit branch -m \nThis will set the new name for the current branch you are working with.\nTo rename another branch:\ngit branch -m \nHere you have to provide the old branch name and the new branch name.\n"},{"upvotes":120,"author":"unimplemented","content":"120\n1. Rename\nIf it is your current branch, just do\ngit branch -m new_name\nIf it is another branch you want to rename\ngit branch -m old_name new_name\n2. Track a new remote branch\n- If your branch was pushed, then after renaming you need to delete it from the remote Git repository and ask your new local to track a new remote branch:\ngit push origin :old_name\ngit push --set-upstream origin new_name\n"},{"upvotes":93,"author":"unimplemented","content":"93\nI foolishly named a branch starting with a hyphen, and then checked out master. I didn't want to delete my branch, I had work in it.\nNeither of these worked:\ngit checkout -dumb-name\ngit checkout -- -dumb-name\n\"s, 's and \\s didn't help either. git branch -m doesn't work.\nHere's how I finally fixed it. Go into your working copy's .git/refs/heads, find the filename \"-dumb-name\", get the hash of the branch. Then this will check it out, make a new branch with a sane name, and delete the old one.\ngit checkout {hash}\ngit checkout -b brilliant-name\ngit branch -d -- -dumb-name\n"},{"upvotes":91,"author":"unimplemented","content":"91\nUpdate 2024\nBefore we begin, make sure you’ve selected the branch you want to rename:\ngit checkout old-name\nIf you want to see all of your local branches, use the following command:\ngit branch --list\nWhen you’re all clear, follow these steps:\nUsing the Git rename branch command will require you to add an -m option to your command:\ngit branch -m new-name\nYou can also rename a local branch from another branch by using the following two commands:\ngit checkout master\n\ngit branch -m old-name new-name\nLastly, this command will list all — both local and remote — branches to verify that it has been renamed:\ngit branch -a\nAlthough it isn’t possible to rename a remote branch directly, the process of renaming one involves these two easy steps:\nTo start, you need to rename a local branch by following the previous steps. 2.Then delete the old branch and push the new one. You can do this easily with the following command:\n git push origin :old-name new-name\nReset the upstream branch for your new local branch, and you will be all set:\ngit push origin -u new-name\nIn the end, as Nicolas Castro explained in the comments, to reset the upstream, run two commands respectively.\ngit branch --unset-upstream\n\ngit push --set-upstream origin new-name\n"},{"upvotes":88,"author":"unimplemented","content":"88\nJust three steps to replicate change in name on remote as well as on GitHub:\nStep 1 git branch -m old_branchname new_branchname\nStep 2 git push origin :old_branchname new_branchname\nStep 3 git push --set-upstream origin new_branchname\n"},{"upvotes":83,"author":"unimplemented","content":"83\nTo rename a branch locally:\ngit branch -m [old-branch] [new-branch]\nNow you'll have to propagate these changes on your remote server as well.\nTo push changes of the deleted old branch:\ngit push origin :[old-branch]\nTo push changes of creation of new branch:\ngit push origin [new-branch]\n"},{"upvotes":71,"author":"unimplemented","content":"71\nTrying to answer specifically the question (at least the title).\nYou can also rename the local branch, but keep tracking the old name on the remote.\ngit branch -m old_branch new_branch\ngit push --set-upstream origin new_branch:old_branch\nNow, when you run git push, the remote old_branch ref is updated with your local new_branch.\nYou have to know and remember this configuration. But it can be useful if you don't have the choice for the remote branch name, but you don't like it (oh, I mean, you've got a very good reason not to like it !) and prefer a clearer name for your local branch.\nPlaying with the fetch configuration, you can even rename the local remote-reference. i.e, having a refs/remote/origin/new_branch ref pointer to the branch, that is in fact the old_branch on origin. However, I highly discourage this, for the safety of your mind.\n"},{"upvotes":50,"author":"unimplemented","content":"50\nRename the branch using this command:\ngit branch -m [old_branch_name] [new_branch_name]\n-m: It renames/moves the branch. If there is already a branch, you will get an error.\nIf there is already a branch and you want to rename with that branch, use:\n git rename -M [old_branch_name] [new_branch_name]\nFor more information about help, use this command in the terminal:\ngit branch --help\nor\nman git branch\n"},{"upvotes":46,"author":"unimplemented","content":"46\nAdvanced Git users can rename manually using:\nRename the old branch under .git/refs/heads to the new name\n\nRename the old branch under .git/logs/refs/heads to the new name\n\nUpdate the .git/HEAD to point to yout new branch name\n"},{"upvotes":43,"author":"unimplemented","content":"43\nRename your local branch.\nIf you are on the branch you want to rename:\ngit branch -m new-name\nIf you are on a different branch:\ngit branch -m old-name new-name\nDelete the old-name remote branch and push the new-name local branch.\ngit push origin :old-name new-name\nReset the upstream branch for the new-name local branch. Switch to the branch and then:\ngit push origin -u new-name\nOr for a fast way to do that, you can use these 3 steps:\n# Rename branch locally\ngit branch -m old_branch new_branch \n# Delete the old remote branch\ngit push origin :old_branch \n# Push the new branch, set local branch to track the new remote\ngit push --set-upstream origin new_branch \nReferance: https://www.w3docs.com/snippets/git/how-to-rename-git-local-and-remote-branches.html\n"},{"upvotes":34,"author":"unimplemented","content":"34\nHere are three steps: A command that you can call inside your terminal and change branch name.\ngit branch -m old_branch new_branch # Rename branch locally\ngit push origin :old_branch # Delete the old branch\ngit push --set-upstream origin new_branch # Push the new branch, set local branch to track the new remote\nIf you need more: step-by-step, How To Change Git Branch Name is a good article about that.\n"},{"upvotes":31,"author":"unimplemented","content":"31\nProbably as mentioned by others, this will be a case mismatch in branch naming.\nIf you have such a situation, I can guess that you're on Windows which will also lead you to:\n$ git branch -m CaseSensitive casesensitive\nfatal: A branch named 'casesensitive' already exists.\nThen you have to do an intermediate step:\n$ git branch -m temporary\n$ git branch -m casesensitive\nNothing more.\n"},{"upvotes":28,"author":"unimplemented","content":"28\nChanging the branch locally is quite easy...\nIf you are on the branch you want to change the name for, simply do this:\ngit branch -m my_new_branch\nOtherwise, if you are on master or any other branch other than the one you'd like to change the name, simply do:\ngit branch -m my_old_branch my_new_branch\nAlso, I create the image below to show this in action on a command line. In this case, you are on master branch, for example:\n"},{"upvotes":26,"author":"unimplemented","content":"26\nTo rename the current branch (except for detached HEAD state) you can also use this alias:\n[alias]\n mvh = !sh -c 'git branch -m `git rev-parse --abbrev-ref HEAD` $1'\n"},{"upvotes":26,"author":"unimplemented","content":"26\nSince you do not want to push the branch to a remote server, this example will be useful:\nLet's say you have an existing branch called \"my-hot-feature,\" and you want to rename it to \"feature-15.\"\nFirst, you want to change your local branch. This couldn't be easier:\ngit branch -m my-hot-feature feature-15\nFor more information, you can visit Locally and Remotely Renaming a Branch in Git.\n"},{"upvotes":23,"author":"unimplemented","content":"23\nIf you are willing to use SourceTree (which I strongly recommend), you can right click your branch and chose 'Rename'.\n"},{"upvotes":22,"author":"unimplemented","content":"22\nAnother option is not to use the command line at all. Git GUI clients such as SourceTree take away much of the syntactical learning curve / pain that causes questions such as this one to be amongst the most viewed on Stack Overflow.\nIn SourceTree, right click on any local branch in the \"Branches\" pane on the left and select \"Rename ...\".\n"},{"upvotes":22,"author":"unimplemented","content":"22\nA simple way to do it:\ngit branch -m old_branch new_branch # Rename branch locally\ngit push origin :old_branch # Delete the old branch\ngit push --set-upstream origin new_branch # Push the new branch, set local branch to track the new remote\nFor more, see this.\n"},{"upvotes":17,"author":"unimplemented","content":"17\nGit version 2.9.2\nIf you want to change the name of the local branch you are on:\ngit branch -m new_name\nIf you want to change the name of a different branch:\ngit branch -m old_name new_name\nIf you want to change the name of a different branch to a name that already exists:\ngit branch -M old_name new_name_that_already_exists\nNote: The last command is destructive and will rename your branch, but you will lose the old branch with that name and those commits because branch names must be unique.\n"},{"upvotes":15,"author":"unimplemented","content":"15\nIf you want to change the name of the current branch, run:\ngit branch -m [old_branch] [new_branch]\nIf you want to delete the old remote branch, run:\ngit push origin :[old_branch]\nIf you want to delete the old remote branch and create a new remote branch, run:\ngit push origin :old_branch new_branch\n"},{"upvotes":13,"author":"unimplemented","content":"13\nFor more details on this procedure.\nHow to rename a local branch in Git\nTo rename the current branch, make sure you’ve checked out and are using the branch you want to rename.\ngit checkout oldbranch\nAnd then\ngit branch -m newbranch \nIf you want to, you can rename a branch when you’re working in another branch.\ngit branch -m oldbranch newbranch\nHow to rename a remote branch in Git\nIf others use this branch and commit to it, you should pull it before renaming it locally. This ensures that your local repository is updated and that changes made by other users will not be lost.\nFirst, we need to delete oldbranch from the remote repository, and push newbranch to the remote.\ngit push origin --delete oldbranch\nNow we’ll push the new one to the remote, by using -u (set upstream) option.\ngit push origin -u newbranch\n"},{"upvotes":11,"author":"unimplemented","content":"11\nActually you have three steps because the local branch has a duplicate on the server so we have one step for local on two steps on the server:\nRename local: just use the following command to rename your current branch, even you checked it out:\ngit branch -m \nDelete the server one: use the following command to delete the old name branch on the server:\ngit push :\nPush the new one: now it's time to push the new branch named on the server:\ngit push -u \n"},{"upvotes":9,"author":"unimplemented","content":"9\nGit branch rename can be done by using:\ngit branch -m oldBranch newBranch\ngit branch -M oldBranch ExistingBranch\nThe difference between -m and -M:\n-m: if you're trying to rename your branch with an existing branch name using -m. It will raise an error saying that the branch already exists. You need to give unique name.\nBut,\n-M: this will help you to force rename with a given name, even it is exists. So an existing branch will overwrite entirely with it...\nHere is a Git terminal example,\nmohideen@dev:~/project/myapp/sunithamakeup$ git branch\n master\n master0\n new_master\n test\n* test1\nmohideen@dev:~/project/myapp/sunithamakeup$ git branch -m test1 test\nfatal: A branch named 'test' already exists.\nmohideen@dev:~/project/myapp/sunithamakeup$ git branch -M test1 test\nmohideen@dev:~/project/myapp/sunithamakeup$ git branch\n master\n master0\n new_master\n* test\nmohideen@dev:~/project/myapp/sunithamakeup$\n"},{"upvotes":9,"author":"unimplemented","content":"9\nAll of the previous answers are talking about git branch -m. Of course, it's easy to operate, but for me, it may be a little hard to remember another Git command. So I tried to get the work done by the command I was familiar with. Yeah, you may guessed it.\nI use git branch -b . And if you don't want to save the old branch now you can execute git branch -D to remove it.\nI know it may be a little tedious, but it's easier to understand and remember. I hope it‘s helpful for you.\n"},{"upvotes":11563,"author":"unimplemented","content":"11563\nFor JSON text:\napplication/json\nThe MIME media type for JSON text is application/json. The default encoding is UTF-8. (Source: RFC 4627)\nFor JSONP (runnable JavaScript) with callback:\napplication/javascript\nHere are some blog posts that were mentioned in the relevant comments:\nWhy you shouldn't use text/html for JSON\nInternet Explorer sometimes has issues with application/json\nA rather complete list of Mimetypes and what to use them for\nThe official mime type list at IANA from @gnrfan's answer below\n"},{"upvotes":1818,"author":"unimplemented","content":"1818\nIANA has registered the official MIME Type for JSON as application/json.\nWhen asked about why not text/json, Crockford seems to have said JSON is not really JavaScript nor text and also IANA was more likely to hand out application/* than text/*.\nMore resources:\nMedia Types\nRequest for Comments 4627\nbluesmoon: JSON has a type\n"},{"upvotes":1031,"author":"unimplemented","content":"1031\nFor JSON:\nContent-Type: application/json\nFor JSON-P:\nContent-Type: application/javascript\n"},{"upvotes":706,"author":"unimplemented","content":"706\nOf course, the correct MIME media type for JSON is application/json, but it's necessary to realize what type of data is expected in your application.\nFor example, I use Java Ext GWT and the server response must go as text/html but contains JSON data.\nClient side, Ext GWT form listener\nuploadForm.getForm().addListener(new FormListenerAdapter()\n{\n @Override\n public void onActionFailed(Form form, int httpStatus, String responseText) \n {\n MessageBox.alert(\"Error\");\n }\n\n @Override\n public void onActionComplete(Form form, int httpStatus, String responseText) \n {\n MessageBox.alert(\"Success\");\n }\n});\nIn case of using application/json response type, the browser suggests me to save the file.\nServer side source code snippet using Spring MVC\nreturn new AbstractUrlBasedView() \n{\n @SuppressWarnings(\"unchecked\")\n @Override\n protected void renderMergedOutputModel(Map model, HttpServletRequest request,\n HttpServletResponse response) throws Exception \n {\n response.setContentType(\"text/html\");\n response.getWriter().write(json);\n }\n};\n"},{"upvotes":521,"author":"unimplemented","content":"521\nJSON:\nResponse is dynamically generated data, according to the query parameters passed in the URL.\nExample:\n{ \"Name\": \"Foo\", \"Id\": 1234, \"Rank\": 7 }\nContent-Type: application/json\nJSON-P:\nJSON with padding. Response is JSON data, with a function call wrapped around it.\nExample:\nfunctionCall({\"Name\": \"Foo\", \"Id\": 1234, \"Rank\": 7});\nContent-Type: application/javascript\n"},{"upvotes":440,"author":"unimplemented","content":"440\nIf you are using Ubuntu or Debian and you serve .json files through Apache, you might want to serve the files with the correct content type. I am doing this primarily because I want to use the Firefox extension JSONView\nThe Apache module mod_mime will help to do this easily. However, with Ubuntu you need to edit the file /etc/mime.types and add the line\napplication/json json\nThen restart Apache:\nsudo service apache2 restart\n"},{"upvotes":418,"author":"unimplemented","content":"418\nIf you're calling ASP.NET Web Services from the client-side you have to use application/json for it to work. I believe this is the same for the jQuery and Ext frameworks.\n"},{"upvotes":333,"author":"unimplemented","content":"333\nThe right content type for JSON is application/json UNLESS you're using JSONP, also known as JSON with Padding, which is actually JavaScript and so the right content type would be application/javascript.\n"},{"upvotes":325,"author":"unimplemented","content":"325\nThere is no doubt that application/json is the best MIME type for a JSON response.\nBut I had some experience where I had to use application/x-javascript because of some compression issues. My hosting environment is shared hosting with GoDaddy. They do not allow me to change server configurations. I had added the following code to my web.config file for compressing responses.\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n\n\nBy using this, the .aspx pages was compressed with g-zip but JSON responses were not. I added\n\nin the static and dynamic types sections. But this does not compress JSON responses at all.\nAfter that I removed this newly added type and added\n\nin both the static and dynamic types sections, and changed the response type in\n.ashx (asynchronous handler) to\napplication/x-javascript\nAnd now I found that my JSON responses were compressed with g-zip. So I personally recommend to use\napplication/x-javascript\nonly if you want to compress your JSON responses on a shared hosting environment. Because in shared hosting, they do not allow you to change IIS configurations.\n"},{"upvotes":285,"author":"unimplemented","content":"285\nOnly when using application/json as the MIME type I have the following (as of November 2011 with the most recent versions of Chrome, Firefox with Firebug):\nNo more warnings from Chrome when the JSON is loaded from the server.\nFirebug will add a tab to the response showing you the JSON data formatted. If the MIME type is different, it will just show up as 'Response content'.\n"},{"upvotes":269,"author":"unimplemented","content":"269\nNot everything works for content type application/json.\nIf you are using Ext JS form submit to upload file, be aware that the server response is parsed by the browser to create the document for the